# Pandas

![gif](imgs/P003.gif)

## Imports

In [1]:
import pandas as pd
import numpy as np

## Arifmetic operations

### series

In [2]:
s1 = pd.Series([7,-2,4,2], index=list('acde'))
s2 = pd.Series([-2,3,-1,4,5], index=list('acefg'))

s1

a    7
c   -2
d    4
e    2
dtype: int64

In [3]:
s2

a   -2
c    3
e   -1
f    4
g    5
dtype: int64

### +

In [4]:
s1 + s2

a    5.0
c    1.0
d    NaN
e    1.0
f    NaN
g    NaN
dtype: float64

### dataframe

In [5]:
df1 = pd.DataFrame(np.arange(9.).reshape((3,3)), 
                   columns=list('bcd'), 
                   index=['A01', 'A02', 'A03'])

df2 = pd.DataFrame(np.arange(12.).reshape((4,3)), 
                   columns=list('bcd'), 
                   index=['A01', 'A02', 'A03', 'A04'])

df1

Unnamed: 0,b,c,d
A01,0.0,1.0,2.0
A02,3.0,4.0,5.0
A03,6.0,7.0,8.0


In [6]:
df2

Unnamed: 0,b,c,d
A01,0.0,1.0,2.0
A02,3.0,4.0,5.0
A03,6.0,7.0,8.0
A04,9.0,10.0,11.0


In [7]:
df1 + df2

Unnamed: 0,b,c,d
A01,0.0,2.0,4.0
A02,6.0,8.0,10.0
A03,12.0,14.0,16.0
A04,,,


## Remake values with arifmetic operations

In [8]:
df1 = pd.DataFrame(np.arange(12.).reshape((3,4)), 
                   columns=list('abcd'))

df2 = pd.DataFrame(np.arange(20.).reshape((4,5)), 
                   columns=list('abcde'))

df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [9]:
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [10]:
df1 + df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,11.0,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [11]:
df1.add(df2, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,11.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


### –  /  *

In [12]:
df1.sub(df2, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,0.0,0.0,0.0,-4.0
1,-1.0,-1.0,-1.0,-1.0,-9.0
2,-2.0,-2.0,-2.0,-2.0,-14.0
3,-15.0,-16.0,-17.0,-18.0,-19.0


In [13]:
df1.div(df2, fill_value=0) 

Unnamed: 0,a,b,c,d,e
0,,1.0,1.0,1.0,0.0
1,0.8,0.833333,0.857143,0.875,0.0
2,0.8,0.818182,0.833333,0.846154,0.0
3,0.0,0.0,0.0,0.0,0.0


In [14]:
df1.mul(df2, fill_value=1)

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,4.0,9.0,4.0
1,20.0,30.0,42.0,56.0,9.0
2,80.0,99.0,120.0,143.0,14.0
3,15.0,16.0,17.0,18.0,19.0


## Operations between DataFrame & Series

In [15]:
arr = np.arange(12.).reshape((3,4))
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [16]:
arr[0]

array([0., 1., 2., 3.])

In [17]:
arr - arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [18]:
df = pd.DataFrame(np.arange(12.).reshape((4,3)), 
                  columns=list('bde'), 
                  index=list('ABCD'))

df

Unnamed: 0,b,d,e
A,0.0,1.0,2.0
B,3.0,4.0,5.0
C,6.0,7.0,8.0
D,9.0,10.0,11.0


In [19]:
s1 = df.loc['A']
s1

b    0.0
d    1.0
e    2.0
Name: A, dtype: float64

In [20]:
df - s1

Unnamed: 0,b,d,e
A,0.0,0.0,0.0
B,3.0,3.0,3.0
C,6.0,6.0,6.0
D,9.0,9.0,9.0


In [21]:
s2 = df['e']
s2

A     2.0
B     5.0
C     8.0
D    11.0
Name: e, dtype: float64

In [22]:
df - s2

Unnamed: 0,A,B,C,D,b,d,e
A,,,,,,,
B,,,,,,,
C,,,,,,,
D,,,,,,,


In [23]:
df.sub(s2, axis=0)

Unnamed: 0,b,d,e
A,-2.0,-1.0,0.0
B,-2.0,-1.0,0.0
C,-2.0,-1.0,0.0
D,-2.0,-1.0,0.0


In [24]:
df + s1

Unnamed: 0,b,d,e
A,0.0,2.0,4.0
B,3.0,5.0,7.0
C,6.0,8.0,10.0
D,9.0,11.0,13.0


## Applying functions and review

In [25]:
df = pd.DataFrame(np.random.randn(4,3), 
                  columns=list('bde'), 
                  index=list('ABCD'))

df

Unnamed: 0,b,d,e
A,-0.431593,-1.563081,-1.154791
B,-1.063756,0.900567,-0.026436
C,-0.25066,0.096788,-1.94939
D,-1.317537,0.115563,0.167


### simp func

In [26]:
np.abs(df)

Unnamed: 0,b,d,e
A,0.431593,1.563081,1.154791
B,1.063756,0.900567,0.026436
C,0.25066,0.096788,1.94939
D,1.317537,0.115563,0.167


### lambda

In [27]:
f = lambda x: x.max() - x.min()

df.apply(f)

b    1.066877
d    2.463648
e    2.116390
dtype: float64

In [28]:
df.apply(f, axis=1)

A    1.131488
B    1.964323
C    2.046178
D    1.484537
dtype: float64

### func

In [29]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])

df.apply(f)

Unnamed: 0,b,d,e
min,-1.317537,-1.563081,-1.94939
max,-0.25066,0.900567,0.167


In [30]:
df.apply(f, axis=1)

Unnamed: 0,min,max
A,-1.563081,-0.431593
B,-1.063756,0.900567
C,-1.94939,0.096788
D,-1.317537,0.167


### to each element

In [31]:
f = lambda x: '%.2f' % x

In [32]:
df.applymap(f)

Unnamed: 0,b,d,e
A,-0.43,-1.56,-1.15
B,-1.06,0.9,-0.03
C,-0.25,0.1,-1.95
D,-1.32,0.12,0.17


## Sort and ranging

### sort_index

### series

In [33]:
s = pd.Series(np.random.randn(4), index=list('dabc'))
s

d   -0.483212
a   -2.071156
b   -0.675523
c   -0.339184
dtype: float64

In [34]:
s.sort_index()

a   -2.071156
b   -0.675523
c   -0.339184
d   -0.483212
dtype: float64

In [35]:
s.sort_values()

a   -2.071156
b   -0.675523
d   -0.483212
c   -0.339184
dtype: float64

In [36]:
s.sort_values(ascending=False)

c   -0.339184
d   -0.483212
b   -0.675523
a   -2.071156
dtype: float64

In [37]:
s['e'] = np.nan
s.sort_values()

a   -2.071156
b   -0.675523
d   -0.483212
c   -0.339184
e         NaN
dtype: float64

### dataframe

In [38]:
df = pd.DataFrame(np.arange(8).reshape((2,4)), 
                  index=['three', 'one'], 
                  columns=list('dabc'))

df

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [39]:
df.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [40]:
df.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [41]:
df.sort_index(axis=1).sort_index(axis=0)

Unnamed: 0,a,b,c,d
one,5,6,7,4
three,1,2,3,0


In [42]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [43]:
df.sort_values(by='c')

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [44]:
df.sort_values(by=['a', 'c'])

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


### ranging

### series

In [45]:
s = pd.Series([7, -5, 7, 4, 2, 0 , 4])
s

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [46]:
s.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [47]:
s.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [48]:
s.rank(ascending=False, method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

### dataframe

In [49]:
df = pd.DataFrame(np.arange(20).reshape((5,4)), columns=list('abcd'), index=list('ABCDE'))
df

Unnamed: 0,a,b,c,d
A,0,1,2,3
B,4,5,6,7
C,8,9,10,11
D,12,13,14,15
E,16,17,18,19


In [50]:
df.rank(axis=1)

Unnamed: 0,a,b,c,d
A,1.0,2.0,3.0,4.0
B,1.0,2.0,3.0,4.0
C,1.0,2.0,3.0,4.0
D,1.0,2.0,3.0,4.0
E,1.0,2.0,3.0,4.0
