# Pandas

![gif](imgs/P001.gif)

## Import

In [1]:
import pandas as pd
import numpy as np

## Series

In [2]:
s = pd.Series([4,2,-3,5])
s

0    4
1    2
2   -3
3    5
dtype: int64

In [3]:
s.values

array([ 4,  2, -3,  5], dtype=int64)

In [4]:
s.index

RangeIndex(start=0, stop=4, step=1)

### custom index

In [5]:
s = pd.Series([1,3,5,7], index=['a', 'b', 'd', 'c'])
s

a    1
b    3
d    5
c    7
dtype: int64

In [6]:
s.index

Index(['a', 'b', 'd', 'c'], dtype='object')

### indexing

In [7]:
s['a']

1

In [8]:
s['c']

7

In [9]:
s['b'] = 10
s

a     1
b    10
d     5
c     7
dtype: int64

In [10]:
s[['a', 'b', 'c', 'd']]

a     1
b    10
c     7
d     5
dtype: int64

### math operations

In [11]:
s[s > 1]

b    10
d     5
c     7
dtype: int64

In [12]:
s * 2

a     2
b    20
d    10
c    14
dtype: int64

In [13]:
np.exp(s)

a        2.718282
b    22026.465795
d      148.413159
c     1096.633158
dtype: float64

In [14]:
s ** 3

a       1
b    1000
d     125
c     343
dtype: int64

### dict-like operations

In [15]:
'a' in s

True

In [16]:
'k' in s

False

### dict → series

In [17]:
dict_data = {'Andy': 2, 'Alba': 13, 'Bash': 4, 'Ivan': 10}
s1 = pd.Series(dict_data)
s1

Andy     2
Alba    13
Bash     4
Ivan    10
dtype: int64

In [18]:
names = ['Andy', 'Alba', 'SpiderMan', 'Ivan']
s2 = pd.Series(dict_data, index=names)
s2

Andy          2.0
Alba         13.0
SpiderMan     NaN
Ivan         10.0
dtype: float64

In [19]:
pd.isnull(s2)

Andy         False
Alba         False
SpiderMan     True
Ivan         False
dtype: bool

In [20]:
pd.notnull(s2)

Andy          True
Alba          True
SpiderMan    False
Ivan          True
dtype: bool

In [21]:
s2.isnull()

Andy         False
Alba         False
SpiderMan     True
Ivan         False
dtype: bool

In [22]:
s = s1 + s2
s

Alba         26.0
Andy          4.0
Bash          NaN
Ivan         20.0
SpiderMan     NaN
dtype: float64

### name

In [23]:
s.name = 'scores'
s.index.name = 'names'

s

names
Alba         26.0
Andy          4.0
Bash          NaN
Ivan         20.0
SpiderMan     NaN
Name: scores, dtype: float64

## DataFrame

In [24]:
dict_data = {'Andy': [1,2,3,4], 
             'Alba': [13,12,1,44], 
             'Bash': [93,1,12,3], 
             'Ivan': [9,12,6,19]}

df = pd.DataFrame(dict_data)
df

Unnamed: 0,Andy,Alba,Bash,Ivan
0,1,13,93,9
1,2,12,1,12
2,3,1,12,6
3,4,44,3,19


In [25]:
df.columns = ['A', 'Al', 'B', 'I']
df.index = ['fi', 's', 't', 'f']
df

Unnamed: 0,A,Al,B,I
fi,1,13,93,9
s,2,12,1,12
t,3,1,12,6
f,4,44,3,19


In [26]:
df['Al']

fi    13
s     12
t      1
f     44
Name: Al, dtype: int64

In [27]:
df.Al

fi    13
s     12
t      1
f     44
Name: Al, dtype: int64

In [28]:
df.loc['fi']

A      1
Al    13
B     93
I      9
Name: fi, dtype: int64

### new col

In [29]:
df['new'] = 14.99
df

Unnamed: 0,A,Al,B,I,new
fi,1,13,93,9,14.99
s,2,12,1,12,14.99
t,3,1,12,6,14.99
f,4,44,3,19,14.99


In [30]:
df.new = np.arange(4)
df

Unnamed: 0,A,Al,B,I,new
fi,1,13,93,9,0
s,2,12,1,12,1
t,3,1,12,6,2
f,4,44,3,19,3


In [31]:
s = pd.Series([1, 4, 8, 13], index=['fi', 's', 't', 'r'])
s

fi     1
s      4
t      8
r     13
dtype: int64

In [32]:
df['ser'] = s
df

Unnamed: 0,A,Al,B,I,new,ser
fi,1,13,93,9,0,1.0
s,2,12,1,12,1,4.0
t,3,1,12,6,2,8.0
f,4,44,3,19,3,


In [33]:
df['O'] = df.B > 10
df

Unnamed: 0,A,Al,B,I,new,ser,O
fi,1,13,93,9,0,1.0,True
s,2,12,1,12,1,4.0,False
t,3,1,12,6,2,8.0,True
f,4,44,3,19,3,,False


### del col

In [34]:
del df['O']
df

Unnamed: 0,A,Al,B,I,new,ser
fi,1,13,93,9,0,1.0
s,2,12,1,12,1,4.0
t,3,1,12,6,2,8.0
f,4,44,3,19,3,


### dict of dict

In [35]:
d2 = {'Shrek': {2000: 1, 2001: 2, 2003: 3 }, 
      'Matrix': {2000: 20, 2001: 2, 2003: -22}}

df = pd.DataFrame(d2)
df

Unnamed: 0,Shrek,Matrix
2000,1,20
2001,2,2
2003,3,-22


### T

In [36]:
df.T

Unnamed: 0,2000,2001,2003
Shrek,1,2,3
Matrix,20,2,-22


### names

In [37]:
df.columns.name = 'Film'
df.index.name = 'Year'

df

Film,Shrek,Matrix
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,1,20
2001,2,2
2003,3,-22


### values & ndarray

In [38]:
df.values

array([[  1,  20],
       [  2,   2],
       [  3, -22]], dtype=int64)

In [39]:
type(df.values)

numpy.ndarray

## Indexes

In [40]:
s = pd.Series(range(3), index=['a', 'b', 'c'])
s

a    0
b    1
c    2
dtype: int64

In [41]:
index = s.index
index

Index(['a', 'b', 'c'], dtype='object')

In [42]:
index[1:]

Index(['b', 'c'], dtype='object')

In [43]:
try: 
    index[0] = 'e'
except TypeError:
    print('TypeError')

TypeError


In [44]:
index = pd.Index(np.arange(3))

s = pd.Series([1, -2, 0], index=index)
s

0    1
1   -2
2    0
dtype: int64

In [45]:
s.index is index

True