# Getting Started with pandas

In [1]:
import pandas as pd
from pandas import Series, DataFrame

In [2]:
obj = pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [3]:
obj.array

<PandasArray>
[4, 7, -5, 3]
Length: 4, dtype: int64

In [4]:
obj.index

RangeIndex(start=0, stop=4, step=1)

pandas.core.series.Series

In [6]:
obj2 = pd.Series([4,7,-5,3], index=['c','d','a','b'])
for i in obj2.index:
    print(i, obj2[i])

c 4
d 7
a -5
b 3


In [7]:
obj2['c']

4

In [8]:
obj2[['d','a']]

d    7
a   -5
dtype: int64

In [9]:
obj2[obj2>=4]

c    4
d    7
dtype: int64

In [10]:
obj2 * 2

c     8
d    14
a   -10
b     6
dtype: int64

In [11]:
import numpy as np
np.exp(obj2)

c      54.598150
d    1096.633158
a       0.006738
b      20.085537
dtype: float64

In [12]:
kk = np.exp(obj2)
print(type(kk))

<class 'pandas.core.series.Series'>


In [13]:
'd' in obj2

True

In [14]:
sdata= {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [15]:
obj3.index

Index(['Ohio', 'Texas', 'Oregon', 'Utah'], dtype='object')

In [16]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [17]:
obj4.isna()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [18]:
obj4.notna()

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [19]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [20]:
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [21]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [22]:
obj4.name = 'City'
obj4.index.name = 'Population'

In [23]:
obj4

Population
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: City, dtype: float64

In [24]:
obj = pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [25]:
obj.index = ['Bob','Steve','Jeff','Ryan']

In [26]:
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

## DataFrame

In [27]:
import pandas as pd

In [28]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [29]:
frame.head(2)

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7


In [30]:
pd.DataFrame(data, columns=['year','state','pop']) # in this order

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [31]:
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                           index=['one', 'two', 'three', 'four', 'five', 'six'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [32]:
frame2['year']

one      2000
two      2001
three    2002
four     2001
five     2002
six      2003
Name: year, dtype: int64

In [33]:
frame2.dtypes

year       int64
state     object
pop      float64
debt      object
dtype: object

In [34]:
frame2['pop']

one      1.5
two      1.7
three    3.6
four     2.4
five     2.9
six      3.2
Name: pop, dtype: float64

In [35]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [36]:
frame2['state']

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [37]:
frame2.state

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [38]:
frame2['pop']

one      1.5
two      1.7
three    3.6
four     2.4
five     2.9
six      3.2
Name: pop, dtype: float64

In [39]:
frame2.pop # ¿porqué?

<bound method DataFrame.pop of        year   state  pop debt
one    2000    Ohio  1.5  NaN
two    2001    Ohio  1.7  NaN
three  2002    Ohio  3.6  NaN
four   2001  Nevada  2.4  NaN
five   2002  Nevada  2.9  NaN
six    2003  Nevada  3.2  NaN>

frame2

In [40]:
print(frame2)
frame2

       year   state  pop debt
one    2000    Ohio  1.5  NaN
two    2001    Ohio  1.7  NaN
three  2002    Ohio  3.6  NaN
four   2001  Nevada  2.4  NaN
five   2002  Nevada  2.9  NaN
six    2003  Nevada  3.2  NaN


Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [41]:
frame2.state

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [42]:
frame2.year

one      2000
two      2001
three    2002
four     2001
five     2002
six      2003
Name: year, dtype: int64

In [43]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [44]:
kk=pd.Series([1,2,3,4])
kk.name='kk1'
kk

0    1
1    2
2    3
3    4
Name: kk1, dtype: int64

In [45]:
frame2['debt'] = 19.69
frame2.loc['three']

year      2002
state     Ohio
pop        3.6
debt     19.69
Name: three, dtype: object

In [46]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,19.69
two,2001,Ohio,1.7,19.69
three,2002,Ohio,3.6,19.69
four,2001,Nevada,2.4,19.69
five,2002,Nevada,2.9,19.69
six,2003,Nevada,3.2,19.69


In [47]:
import numpy as np
frame2.debt = np.arange(6.)
frame2


Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,1.0
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,3.0
five,2002,Nevada,2.9,4.0
six,2003,Nevada,3.2,5.0


In [48]:
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [49]:
frame2['easter'] = frame2.state == 'Ohio'

In [50]:
frame2

Unnamed: 0,year,state,pop,debt,easter
one,2000,Ohio,1.5,,True
two,2001,Ohio,1.7,-1.2,True
three,2002,Ohio,3.6,,True
four,2001,Nevada,2.4,-1.5,False
five,2002,Nevada,2.9,-1.7,False
six,2003,Nevada,3.2,,False


In [51]:
del frame2['easter']

In [52]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [53]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [54]:
pop = {'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6},        
       'Nevada': {2001: 2.4, 2002: 2.9}}
pop

{'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}, 'Nevada': {2001: 2.4, 2002: 2.9}}

In [55]:
frame3 = pd.DataFrame(pop)
frame3.index

Int64Index([2000, 2001, 2002], dtype='int64')

In [56]:
frame3.T[2002]

Ohio      3.6
Nevada    2.9
Name: 2002, dtype: float64

In [57]:
pop

{'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}, 'Nevada': {2001: 2.4, 2002: 2.9}}

In [58]:
frame4 = pd.DataFrame(pop, index=[2000, 2001, 2002])

In [59]:
frame4.dtypes

Ohio      float64
Nevada    float64
dtype: object

In [60]:
frame4.index

Int64Index([2000, 2001, 2002], dtype='int64')

In [61]:
display(frame3)

Unnamed: 0,Ohio,Nevada
2000,1.5,
2001,1.7,2.4
2002,3.6,2.9


In [62]:
 frame3.index

Int64Index([2000, 2001, 2002], dtype='int64')

In [63]:
frame3.index.name = 'year'
frame3.index

Int64Index([2000, 2001, 2002], dtype='int64', name='year')

In [64]:
frame3

Unnamed: 0_level_0,Ohio,Nevada
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,1.5,
2001,1.7,2.4
2002,3.6,2.9


In [65]:
frame3.columns.name = "state"
frame3.columns

Index(['Ohio', 'Nevada'], dtype='object', name='state')

In [66]:
frame3.to_numpy()

array([[1.5, nan],
       [1.7, 2.4],
       [3.6, 2.9]])

In [67]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [68]:
frame2.to_numpy()

array([[2000, 'Ohio', 1.5, nan],
       [2001, 'Ohio', 1.7, -1.2],
       [2002, 'Ohio', 3.6, nan],
       [2001, 'Nevada', 2.4, -1.5],
       [2002, 'Nevada', 2.9, -1.7],
       [2003, 'Nevada', 3.2, nan]], dtype=object)

## Index Objects

In [69]:
obj = pd.Series(range(3), index=['a', 'b', 'c'])

index = obj.index
index

Index(['a', 'b', 'c'], dtype='object')

In [70]:
# index[2] = "d" # gives an error. Index is inmutable

In [71]:
labels = pd.Index(np.arange(3))
labels

Int64Index([0, 1, 2], dtype='int64')

In [72]:
obj2 = pd.Series([2.4,5.6,1.5], index=labels)
obj2

0    2.4
1    5.6
2    1.5
dtype: float64

In [73]:
obj2.index is labels

True

In [74]:
frame3

state,Ohio,Nevada
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,1.5,
2001,1.7,2.4
2002,3.6,2.9


In [75]:
frame3.columns

Index(['Ohio', 'Nevada'], dtype='object', name='state')

In [76]:
'Ohio' in frame3.columns

True

In [77]:
2002 in frame3.index

True

In [78]:
2003 in frame3.index

False

In [79]:
'2002' in frame3.index

False

In [80]:
2000+2 in frame3.index

True

In [81]:
dup_labels = pd.Index(['foo', 'foo', 'bar', 'bar'])
dup_labels

Index(['foo', 'foo', 'bar', 'bar'], dtype='object')

# Essential Functionality

## Redindexing

In [82]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [83]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [84]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
print(obj3)

0      blue
2    purple
4    yellow
dtype: object


In [85]:
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [86]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                     index=['a', 'c', 'd'],
                     columns=['Ohio', 'Texas', 'California'])

In [87]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [88]:
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [89]:
frame.loc[['a', 'c', 'd'], ['Texas', 'California']]

Unnamed: 0,Texas,California
a,1,2
c,4,5
d,7,8


## Dropping Entries from an Axis

In [90]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [91]:
obj.name = "numeros"

In [92]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
Name: numeros, dtype: float64

In [93]:
obj.drop('e')

a    0.0
b    1.0
c    2.0
d    3.0
Name: numeros, dtype: float64

In [94]:
obj.drop(['d','e'])

a    0.0
b    1.0
c    2.0
Name: numeros, dtype: float64

In [95]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                    index=['Ohio', 'Colorado', 'Utah', 'New York'], 
                    columns=['one', 'two', 'three', 'four'])

In [96]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [97]:
data.drop("Utah")

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
New York,12,13,14,15


In [98]:
data.drop(["New York", "Utah"])

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [99]:
data.drop(["New York", "Colorado"], axis="index")

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Utah,8,9,10,11


In [100]:
data.drop(["New York", "Colorado"], axis=0)

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Utah,8,9,10,11


In [101]:
data.drop('two', axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [102]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
Name: numeros, dtype: float64

In [103]:
obj.drop('c', inplace=True)

In [104]:
obj

a    0.0
b    1.0
d    3.0
e    4.0
Name: numeros, dtype: float64

## Index, Selection and Filtering

In [105]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj['b']

1.0

In [106]:
obj[1]

1.0

In [107]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [108]:
obj[['b', 'a', 'd']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [109]:
obj[[1,3]]

b    1.0
d    3.0
dtype: float64

In [110]:
obj[obj>=2]

c    2.0
d    3.0
dtype: float64

In [111]:
obj.loc[['b', 'a', 'd']] # for labels, better use loc

b    1.0
a    0.0
d    3.0
dtype: float64

In [112]:
obj1 = pd.Series([1, 2, 3], index=[2, 0, 1])
obj1

2    1
0    2
1    3
dtype: int64

In [113]:
obj2 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
obj2

a    1
b    2
c    3
dtype: int64

In [114]:
obj1[[0, 1, 2]] # etiquetas

0    2
1    3
2    1
dtype: int64

In [115]:
obj2[[0, 1, 2]] # orden 

a    1
b    2
c    3
dtype: int64

In [116]:
obj1.loc[[0, 1, 2]] # etiqueta

0    2
1    3
2    1
dtype: int64

In [117]:
obj1.iloc[[0, 1, 2]] # orden

2    1
0    2
1    3
dtype: int64

In [118]:
obj2.iloc[[0,1,2]] # orden

a    1
b    2
c    3
dtype: int64

In [119]:
obj2[[0,1,2]] # orden

a    1
b    2
c    3
dtype: int64

In [120]:
# obj2.loc[[0, 1, 2]] # falla, loc con labels q. no son int

In [121]:
obj2.loc[['a','b','c']] # etiquetas

a    1
b    2
c    3
dtype: int64

In [122]:
obj2

a    1
b    2
c    3
dtype: int64

In [123]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [124]:
obj.loc['b':'c'] # etiquetas. Ojo last now inclusive. Ojo, no array en array

b    1.0
c    2.0
dtype: float64

In [125]:
obj2.loc['b':'c'] 

b    2
c    3
dtype: int64

In [126]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [127]:
obj.loc['b':'c'] = 55
obj

a     0.0
b    55.0
c    55.0
d     3.0
dtype: float64

In [128]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
           index=['Ohio', 'Colorado', 'Utah', 'New York'],
           columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [129]:
data["two"]

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [130]:
data[["three","two"]]

Unnamed: 0,three,two
Ohio,2,1
Colorado,6,5
Utah,10,9
New York,14,13


In [131]:
data[2:]

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [132]:
data[data["three"]>5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [133]:
data[data["three"]<=5]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3


In [134]:
data[:]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [135]:
data<5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [136]:
data[data<5] = 666
data

Unnamed: 0,one,two,three,four
Ohio,666,666,666,666
Colorado,666,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


###  Selection on DataFrame with loc and iloc

In [137]:
data

Unnamed: 0,one,two,three,four
Ohio,666,666,666,666
Colorado,666,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [138]:
type(data.loc['Colorado', ['four','two']])

pandas.core.series.Series

In [139]:
type(data)

pandas.core.frame.DataFrame

In [140]:
type(data.loc['Colorado'])

pandas.core.series.Series

In [141]:
type(data.loc['Colorado':])

pandas.core.frame.DataFrame

In [142]:
data.loc['Colorado':]

Unnamed: 0,one,two,three,four
Colorado,666,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [143]:
data.loc['Colorado', ['four','two']]

four    7
two     5
Name: Colorado, dtype: int64

In [144]:
data.iloc[1, [3,1]] # orden

four    7
two     5
Name: Colorado, dtype: int64

In [145]:
data.iloc[2] # orden

one       8
two       9
three    10
four     11
Name: Utah, dtype: int64

In [146]:
data.iloc[[1, 2], [3, 0, 1]] # orden

Unnamed: 0,four,one,two
Colorado,7,666,5
Utah,11,8,9


In [147]:
data

Unnamed: 0,one,two,three,four
Ohio,666,666,666,666
Colorado,666,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [148]:
data.loc[:'Utah', 'two']

Ohio        666
Colorado      5
Utah          9
Name: two, dtype: int64

In [149]:
data.iloc[:, :3]

Unnamed: 0,one,two,three
Ohio,666,666,666
Colorado,666,5,6
Utah,8,9,10
New York,12,13,14


In [150]:
data.iloc[:, :3][data.three > 9]

Unnamed: 0,one,two,three
Ohio,666,666,666
Utah,8,9,10
New York,12,13,14


In [151]:
data.iloc[:, :3]

Unnamed: 0,one,two,three
Ohio,666,666,666
Colorado,666,5,6
Utah,8,9,10
New York,12,13,14


## Integer Indexing Pitfalls

In [152]:
ser = pd.Series(np.arange(3.))
ser

0    0.0
1    1.0
2    2.0
dtype: float64

In [153]:
# ser[-1] # Error
i = ser.index[-1]
print(ser[i])
print(ser[1:])

2.0
1    1.0
2    2.0
dtype: float64


In [154]:
ser2 = pd.Series(np.arange(3.), index=['a', 'b', 'c'])
ser2[-1]

2.0

## Arithmetic and Data Alignment

In [155]:
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [156]:
df1 = pd.DataFrame(np.arange(9.).reshape(3,3), columns=list('bcd'), index=[ 'Texitas', 'Ohio', 'Colorado'])
df1

Unnamed: 0,b,c,d
Texitas,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [157]:
df2 = pd.DataFrame(np.arange(12.).reshape(4,3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [158]:
df1 + df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,6.0,,9.0,
Oregon,,,,
Texas,,,,
Texitas,,,,
Utah,,,,


In [159]:
display(df1)
display(df2)
df1.add(df2, fill_value=1000) # al que le falta lo inicializa a 1000

Unnamed: 0,b,c,d
Texitas,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


Unnamed: 0,b,c,d,e
Colorado,1006.0,1007.0,1008.0,
Ohio,6.0,1004.0,9.0,1005.0
Oregon,1009.0,,1010.0,1011.0
Texas,1006.0,,1007.0,1008.0
Texitas,1000.0,1001.0,1002.0,
Utah,1000.0,,1001.0,1002.0


In [160]:
df1.add(df2, fill_value=0) # al que le falta lo inicializa a 0

Unnamed: 0,b,c,d,e
Colorado,6.0,7.0,8.0,
Ohio,6.0,4.0,9.0,5.0
Oregon,9.0,,10.0,11.0
Texas,6.0,,7.0,8.0
Texitas,0.0,1.0,2.0,
Utah,0.0,,1.0,2.0


In [161]:
df3 = pd.DataFrame(np.arange(4.).reshape(2,2), columns=list('uy'), index=[ 'Tex1', 'Tex2'])
df3

Unnamed: 0,u,y
Tex1,0.0,1.0
Tex2,2.0,3.0


In [162]:
df1 + df3

Unnamed: 0,b,c,d,u,y
Colorado,,,,,
Ohio,,,,,
Tex1,,,,,
Tex2,,,,,
Texitas,,,,,


### Arithmetic methods with fill values

In [163]:
df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [164]:
df2 = pd.DataFrame(np.arange(20.).reshape((4,5)), columns=list('abcde'))
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [165]:
df2.loc[1, 'b'] = np.nan
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [166]:
df1 + df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [167]:
display(df1)
display(df2)
display(df1.add(df2, fill_value=0))

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,5.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [168]:
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [169]:
1/df1

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [170]:
df1.rdiv(2)

Unnamed: 0,a,b,c,d
0,inf,2.0,1.0,0.666667
1,0.5,0.4,0.333333,0.285714
2,0.25,0.222222,0.2,0.181818


In [171]:
display(df1)
display(df2)
display(df1.reindex(columns=df2.columns, fill_value=0))
display(df1)

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,0
1,4.0,5.0,6.0,7.0,0
2,8.0,9.0,10.0,11.0,0


Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [172]:
df2.reindex(columns=df1.columns, fill_value=0)

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,5.0,,7.0,8.0
2,10.0,11.0,12.0,13.0
3,15.0,16.0,17.0,18.0


In [173]:
display(df1)
display(df2)

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [174]:
df1.add(df2)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


# Operations between DataFrame and Series

In [175]:
arr = np.arange(12.).reshape((3,4))
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [176]:
arr[0]

array([0., 1., 2., 3.])

In [177]:
arr - arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [178]:
frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),
                    columns=list('bde'),
                    index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [179]:
series = frame.iloc[0]
series

b    0.0
d    1.0
e    2.0
Name: Utah, dtype: float64

In [180]:
frame - series

Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


In [181]:
series2 = pd.Series(range(3), index=['b','e','f'])
frame + series2

Unnamed: 0,b,d,e,f
Utah,0.0,,3.0,
Ohio,3.0,,6.0,
Texas,6.0,,9.0,
Oregon,9.0,,12.0,


In [182]:
series3 = frame['d']
series3

Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64

In [183]:
display(frame)
display(series3)

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


Utah       1.0
Ohio       4.0
Texas      7.0
Oregon    10.0
Name: d, dtype: float64

In [184]:
frame.sub(series3, axis='index')

Unnamed: 0,b,d,e
Utah,-1.0,0.0,1.0
Ohio,-1.0,0.0,1.0
Texas,-1.0,0.0,1.0
Oregon,-1.0,0.0,1.0


# Function Application and Mapping