In [1]:
import pandas as pd
import numpy as np

In [2]:
# reindexing

series = pd.Series([2.6, 3.2, 6.7, 8.9], index=['b', 'd', 'c', 'a'])
series

b    2.6
d    3.2
c    6.7
a    8.9
dtype: float64

In [3]:
series1 = series.reindex(index=['a', 'b', 'c', 'd', 'e'])
series1

a    8.9
b    2.6
c    6.7
d    3.2
e    NaN
dtype: float64

In [4]:
# You can specify fill method and other kwargs
series2 = series1.reindex(method='ffill', index=['a', 'b', 'c', 'd', 'e', 'f'])
series2

a    8.9
b    2.6
c    6.7
d    3.2
e    NaN
f    NaN
dtype: float64

In [5]:
# Dropping is super easy. Thanks .drop()mb
series.drop('d')
series1.drop(['a', 'b'])

c    6.7
d    3.2
e    NaN
dtype: float64

In [6]:
# You have a ton of flexibility with dataframes
df = pd.DataFrame(np.arange(9).reshape(3, 3),
                  index=['cat', 'dog', 'frog'],
                  columns=['one', 'two', 'three'])
df

Unnamed: 0,one,two,three
cat,0,1,2
dog,3,4,5
frog,6,7,8


In [7]:
df.drop(['one', 'three'], axis=1)

Unnamed: 0,two
cat,1
dog,4
frog,7


In [8]:
df.drop(['cat', 'frog'], axis=0)

Unnamed: 0,one,two,three
dog,3,4,5


In [9]:
# Indexing, etc.
df[df > 1]
df.columns
df.one
df['one']
df[['one', 'three']]

Unnamed: 0,one,three
cat,0,2
dog,3,5
frog,6,8


In [10]:
# One nuance of using python slicing on andas is that end-point is inclusive
df[1:2]

Unnamed: 0,one,two,three
dog,3,4,5


In [11]:
# You can basic query a df using the bracket notation access method
df['one'][df['one'] > 1]

dog     3
frog    6
Name: one, dtype: int32

In [12]:
# loc and iloc
# It's awesome for carving out slices using integer based location or axis labels
df.loc['dog', ['one', 'three']]
df
df.iloc[1, [0, 2]]

one      3
three    5
Name: dog, dtype: int32

In [19]:
# Arithmetic and Data Alignment

# ex of adding together series objects with non matching indices

s1 = pd.Series([1.1, 2.2, 3.3, 4.4], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([2.3, 3.3, 4.3, 5.3, 6.3], index=['b', 'c', 'd', 'a', 'e'])
s1

a    1.1
b    2.2
c    3.3
d    4.4
dtype: float64

In [18]:
s2

b    2.3
c    3.3
d    4.3
a    5.3
e    6.3
dtype: float64

In [21]:
s1 + s2
# Result is union between matching pairs

a    6.4
b    4.5
c    6.6
d    8.7
e    NaN
dtype: float64

In [22]:
# With a df the alignment is checked against rows and columns
df1 = pd.DataFrame(np.arange(25).reshape(5, 5),
                   index=['ca', 'or', 'wa', 'id', 'nv'],
                   columns=['one', 'two', 'three', 'four', 'five'])
df1

Unnamed: 0,one,two,three,four,five
ca,0,1,2,3,4
or,5,6,7,8,9
wa,10,11,12,13,14
id,15,16,17,18,19
nv,20,21,22,23,24


In [25]:
df2 = pd.DataFrame(np.arange(9).reshape(3, 3),
                   index=['ca', 'or', 'fl'],
                   columns=['one', 'two', 'three'])
df2

Unnamed: 0,one,two,three
ca,0,1,2
or,3,4,5
fl,6,7,8


In [24]:
df1 + df2

Unnamed: 0,five,four,one,three,two
ca,,,0.0,4.0,2.0
fl,,,,,
id,,,,,
nv,,,,,
or,,,8.0,12.0,10.0
wa,,,,,


In [27]:
# side note, don't forget how handy list method is!
list('abcdef')[2]

'c'

In [30]:
# Fill methods for objects with mismatched indices
df1.add(df2, fill_value='0')

df2.add(df1, fill_value='0')

Unnamed: 0,five,four,one,three,two
ca,4.0,3.0,0.0,4.0,2.0
fl,,,6.0,8.0,7.0
id,19.0,18.0,15.0,17.0,16.0
nv,24.0,23.0,20.0,22.0,21.0
or,9.0,8.0,8.0,12.0,10.0
wa,14.0,13.0,10.0,12.0,11.0


In [31]:
# Arithmetic operations easy as numpy!
2 / df1

Unnamed: 0,one,two,three,four,five
ca,inf,2.0,1.0,0.666667,0.5
or,0.4,0.333333,0.285714,0.25,0.222222
wa,0.2,0.181818,0.166667,0.153846,0.142857
id,0.133333,0.125,0.117647,0.111111,0.105263
nv,0.1,0.095238,0.090909,0.086957,0.083333


In [32]:
df1.div(2)

Unnamed: 0,one,two,three,four,five
ca,0.0,0.5,1.0,1.5,2.0
or,2.5,3.0,3.5,4.0,4.5
wa,5.0,5.5,6.0,6.5,7.0
id,7.5,8.0,8.5,9.0,9.5
nv,10.0,10.5,11.0,11.5,12.0


In [34]:
df1.rdiv(2)

Unnamed: 0,one,two,three,four,five
ca,inf,2.0,1.0,0.666667,0.5
or,0.4,0.333333,0.285714,0.25,0.222222
wa,0.2,0.181818,0.166667,0.153846,0.142857
id,0.133333,0.125,0.117647,0.111111,0.105263
nv,0.1,0.095238,0.090909,0.086957,0.083333


In [38]:
# Broadcasting
arr = np.arange(12.).reshape(4, 3)
arr

array([[ 0.,  1.,  2.],
       [ 3.,  4.,  5.],
       [ 6.,  7.,  8.],
       [ 9., 10., 11.]])

In [40]:
arr[1, 1]

4.0

In [41]:
arr - arr[1, 1]

array([[-4., -3., -2.],
       [-1.,  0.,  1.],
       [ 2.,  3.,  4.],
       [ 5.,  6.,  7.]])

In [48]:
# Similar if you use series to perform operation against frame
# series4 = pd.Series(np.arange(3))
# series4

series5 = df2.iloc[1]

df2 - series5

df2.sub()

Unnamed: 0,one,two,three
ca,-3,-3,-3
or,0,0,0
fl,3,3,3


In [58]:
# Applying functions to frames
df4 = pd.DataFrame(np.random.randn(4, 3), columns=list('abc'), index=['ca', 'or', 'fl', 'id'])
np.abs(df4)

df4

Unnamed: 0,a,b,c
ca,2.240881,0.744829,-1.051347
or,0.706591,0.384304,0.596618
fl,-0.729973,1.194693,0.562884
id,0.716896,-0.899241,0.140564


In [59]:
func = lambda x: x.max() - x.min()

df4.apply(func)

a    2.970854
b    2.093934
c    1.647964
dtype: float64

In [60]:
# You can apply functions element-wise using applymap method
func2 = lambda x: x * 2

df4.applymap(func2)

Unnamed: 0,a,b,c
ca,4.481761,1.489657,-2.102693
or,1.413181,0.768607,1.193235
fl,-1.459946,2.389386,1.125768
id,1.433792,-1.798482,0.281128
