# Pandas

![gif](imgs/P002.gif)

## Import

In [11]:
import pandas as pd
import numpy as np

## Base functionality

### reindex

In [4]:
s = pd.Series([4,5,6,7], index=['d', 'a', 'b', 'c'])
s

d    4
a    5
b    6
c    7
dtype: int64

In [5]:
sr = s.reindex(['a', 'b', 'c', 'd', 'e', 'f'])
sr

a    5.0
b    6.0
c    7.0
d    4.0
e    NaN
f    NaN
dtype: float64

In [6]:
srn = s.reindex(['a', 'b', 'c', 'd', 'e', 'f'], fill_value=0)
srn

a    5
b    6
c    7
d    4
e    0
f    0
dtype: int64

### method

In [8]:
s = pd.Series(['Andy', 'Abla', 'Bash'], index=[0,2,5])
s.reindex(range(6), method='ffill')

0    Andy
1    Andy
2    Abla
3    Abla
4    Abla
5    Bash
dtype: object

In [10]:
s.reindex(range(6), method='bfill')

0    Andy
1    Abla
2    Abla
3    Bash
4    Bash
5    Bash
dtype: object

### changing cols and rows with reindex

In [21]:
df = pd.DataFrame(np.arange(9).reshape((3,3)), 
                  index=['a', 'c', 'd'], 
                  columns=['A', 'C', 'D'])
df

Unnamed: 0,A,C,D
a,0,1,2
c,3,4,5
d,6,7,8


In [22]:
df.reindex(['a', 'b', 'c', 'd'])

Unnamed: 0,A,C,D
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [23]:
cols = ['A', 'B', 'C', 'D']
df.reindex(['a', 'b', 'c', 'd'], columns=cols)

Unnamed: 0,A,B,C,D
a,0.0,,1.0,2.0
b,,,,
c,3.0,,4.0,5.0
d,6.0,,7.0,8.0


## Removing elements from axis

### series

In [25]:
s = pd.Series(np.arange(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [26]:
s2 = s.drop('c')
s2

a    0
b    1
d    3
e    4
dtype: int32

In [27]:
s2 = s.drop(['a', 'c'])
s2

b    1
d    3
e    4
dtype: int32

### dataframe

In [28]:
df = pd.DataFrame(np.arange(16).reshape((4,4)), 
                  index=['A', 'B', 'C', 'D'], 
                  columns=['one', 'two', 'three', 'four'])
df

Unnamed: 0,one,two,three,four
A,0,1,2,3
B,4,5,6,7
C,8,9,10,11
D,12,13,14,15


In [29]:
df2 = df.drop('A')
df2

Unnamed: 0,one,two,three,four
B,4,5,6,7
C,8,9,10,11
D,12,13,14,15


In [33]:
df2 = df.drop('one', axis=1)
df2

Unnamed: 0,two,three,four
A,1,2,3
B,5,6,7
C,9,10,11
D,13,14,15


## Access to index, choosing, filtres

### series

In [37]:
s = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
s

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [38]:
s['b']

1.0

In [39]:
s[1]

1.0

In [40]:
s['b':'c']

b    1.0
c    2.0
dtype: float64

In [41]:
s[1:2]

b    1.0
dtype: float64

### dataframe

In [42]:
df = pd.DataFrame(np.arange(16).reshape((4,4)),
                  index=['aa', 'bb', 'cc', 'dd'], 
                  columns=['AA', 'BB', 'CC', 'DD'])
df

Unnamed: 0,AA,BB,CC,DD
aa,0,1,2,3
bb,4,5,6,7
cc,8,9,10,11
dd,12,13,14,15


In [45]:
df['CC']

aa     2
bb     6
cc    10
dd    14
Name: CC, dtype: int32

In [47]:
df['cc']

SyntaxError: invalid syntax (<ipython-input-47-e653d8b66288>, line 1)

In [48]:
df[:2]

Unnamed: 0,AA,BB,CC,DD
aa,0,1,2,3
bb,4,5,6,7


In [49]:
df[df['CC'] > 6]

Unnamed: 0,AA,BB,CC,DD
cc,8,9,10,11
dd,12,13,14,15


In [51]:
df < 8

Unnamed: 0,AA,BB,CC,DD
aa,True,True,True,True
bb,True,True,True,True
cc,False,False,False,False
dd,False,False,False,False


In [55]:
df[df < 8] = 0
df

Unnamed: 0,AA,BB,CC,DD
aa,0,0,0,0
bb,0,0,0,0
cc,8,9,10,11
dd,12,13,14,15


In [60]:
df.loc[df.CC > 5, :'CC'] 

Unnamed: 0,AA,BB,CC
cc,8,9,10
dd,12,13,14
