## Pandas Basics

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series(np.random.randn(5), index=['a','b', 'c', 'd', 'e'])

In [3]:
s1 = pd.Series(np.random.randn(5))
s1

0   -1.800327
1   -0.143397
2   -2.224598
3   -0.689831
4    0.239333
dtype: float64

In [4]:
s

a   -1.150614
b    0.501104
c   -0.063302
d    1.177150
e    0.253202
dtype: float64

In [5]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [6]:
pd.Series(np.random.randn(5))

0   -0.050197
1    0.219848
2   -0.352544
3    0.309273
4   -0.246001
dtype: float64

In [7]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}

In [8]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [9]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [10]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [11]:
s[0]

-1.1506138536215527

In [12]:
s[:3]

a   -1.150614
b    0.501104
c   -0.063302
dtype: float64

In [13]:
s['a']

-1.1506138536215527

In [14]:
s['e'] = 12.

In [15]:
s

a    -1.150614
b     0.501104
c    -0.063302
d     1.177150
e    12.000000
dtype: float64

In [16]:
s.get('a')

-1.1506138536215527

In [17]:
ts1 = pd.Series(np.random.randn(5))
ts2 = pd.Series(np.random.randn(5))

In [18]:
d = {'col1': ts1, 'col2': ts2}
d

{'col1': 0   -0.141645
 1    0.650063
 2   -0.306290
 3   -0.427817
 4   -0.346812
 dtype: float64, 'col2': 0    0.744263
 1   -0.488051
 2    0.850870
 3    0.837364
 4    2.085140
 dtype: float64}

In [19]:
df1 = pd.DataFrame(data = d)
df1

Unnamed: 0,col1,col2
0,-0.141645,0.744263
1,0.650063,-0.488051
2,-0.30629,0.85087
3,-0.427817,0.837364
4,-0.346812,2.08514


In [20]:
df2 = pd.DataFrame(np.random.randn(10, 5))
df2

Unnamed: 0,0,1,2,3,4
0,-0.118335,-1.221341,-0.976293,-0.033465,0.402835
1,-0.412112,-0.328003,-1.825015,1.33627,0.39271
2,-0.517438,0.965971,2.156696,1.211731,-1.771843
3,0.526951,0.588232,0.304368,0.464204,0.38829
4,-0.402569,-1.264164,-2.168525,-0.380741,-2.599544
5,0.217967,0.345994,-0.561747,0.44767,0.185703
6,2.023058,0.680341,-0.496836,-0.6935,-0.439701
7,-0.038313,0.429497,0.752536,-0.938364,-0.701727
8,-0.624622,-2.312508,1.995539,0.765122,0.446109
9,0.915602,0.779905,0.719861,-0.321852,-0.289653


In [21]:
df3 = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
df3

Unnamed: 0,a,b,c,d,e
0,-0.730268,-0.586965,-1.535005,0.107623,1.397197
1,-0.677471,-0.428168,-0.357829,-0.102251,0.146786
2,-0.716443,-1.161026,-0.404509,0.960871,0.49108
3,-1.10079,-0.03772,0.905004,-0.980259,0.479523
4,1.181858,-0.996612,-0.373315,-0.706841,1.029946
5,0.549711,0.732233,-0.201548,-0.192338,0.385879
6,-0.071747,-0.57556,0.387641,-1.026616,-2.149041
7,2.01805,0.062585,0.845093,0.874446,0.811008
8,0.948529,0.293358,1.282776,1.243882,1.925139
9,0.119668,-0.761931,0.609768,-0.247673,0.665511


In [22]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [23]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [24]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [25]:
df.columns

Index(['one', 'two'], dtype='object')

In [26]:
df.index.hasnans

False

In [27]:
dfc = pd.read_csv('data1.csv')
dfc

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059
3,2000-01-03,B,-1.135632
4,2000-01-04,B,1.212112
5,2000-01-05,B,-0.173215
6,2000-01-03,C,0.119209
7,2000-01-04,C,-1.044236
8,2000-01-05,C,-0.861849
9,2000-01-03,D,-2.104569


In [28]:
dfc[dfc['variable'] == 'A']

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059


In [29]:
dfc.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.469112,-1.135632,0.119209,-2.104569
2000-01-04,-0.282863,1.212112,-1.044236,-0.494929
2000-01-05,-1.509059,-0.173215,-0.861849,1.071804


In [30]:
dfc.describe()

Unnamed: 0,value
count,12.0
mean,-0.39451
std,1.007649
min,-2.104569
25%,-1.067085
50%,-0.388896
75%,0.206685
max,1.212112
