# Pandas

## Pandas Series

In [264]:
import numpy as np
import pandas as pd

In [265]:
pd.Series([1,2,3,4])

0    1
1    2
2    3
3    4
dtype: int64

In [266]:
series = pd.Series([1,2,3,4])
type(series)

pandas.core.series.Series

In [267]:
series.axes

[RangeIndex(start=0, stop=4, step=1)]

In [268]:
series.dtype

dtype('int64')

In [269]:
series.empty

False

In [270]:
series.ndim

1

In [271]:
series.size

4

In [272]:
series.values

array([1, 2, 3, 4], dtype=int64)

In [273]:
series[:4]

0    1
1    2
2    3
3    4
dtype: int64

In [274]:
series.head(2)

0    1
1    2
dtype: int64

In [275]:
series.tail(2)

2    3
3    4
dtype: int64

In [276]:
a=np.array([1,2,3,55,77,888])
a

array([  1,   2,   3,  55,  77, 888])

In [277]:
series = pd.Series(a)
series

0      1
1      2
2      3
3     55
4     77
5    888
dtype: int32

In [278]:
series.index

RangeIndex(start=0, stop=6, step=1)

In [279]:
pd.Series([1,5,0.8,34], index=[1,3,5,7])

1     1.0
3     5.0
5     0.8
7    34.0
dtype: float64

In [280]:
series = pd.Series([1,2,3,66,88,9,0.3], index=['a','b','c','d','e','f','g'])

In [281]:
series['g']

0.3

In [282]:
dictionary = {'reg':10, 'log':11, 'cart':12}
dictionary

{'reg': 10, 'log': 11, 'cart': 12}

In [283]:
series =pd.Series(dictionary)

In [284]:
series['reg']

10

In [285]:
series['log':'cart']

log     11
cart    12
dtype: int64

In [286]:
pd.concat([series,series])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [287]:
series.append(series)

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [288]:
series

reg     10
log     11
cart    12
dtype: int64

## Index Operations

In [289]:
series = pd.Series([1,2,3,4,5,66,77])
series

0     1
1     2
2     3
3     4
4     5
5    66
6    77
dtype: int64

In [290]:
series.keys

<bound method Series.keys of 0     1
1     2
2     3
3     4
4     5
5    66
6    77
dtype: int64>

In [291]:
list(series.items())

[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 66), (6, 77)]

In [292]:
series.values

array([ 1,  2,  3,  4,  5, 66, 77], dtype=int64)

In [293]:
5 in series

True

In [294]:
series[[4,5]]

4     5
5    66
dtype: int64

In [295]:
series =pd.Series(dictionary)
series

reg     10
log     11
cart    12
dtype: int64

In [296]:
series['reg':'cart']

reg     10
log     11
cart    12
dtype: int64

In [297]:
series[(series>10) & (series<12)]

log    11
dtype: int64

In [298]:
data = pd.Series(['a','b','c'], index = [1,3,5])
data

1    a
3    b
5    c
dtype: object

In [299]:
data[1] # data[0] will gives error

'a'

In [300]:
data[0:3]

1    a
3    b
5    c
dtype: object

In [301]:
data.loc[5]

'c'

In [302]:
data.loc[0:4] # label based indexing location

1    a
3    b
dtype: object

In [303]:
data.iloc[0] # positional indexing, iloc , catch by resetting index

'a'

## Pandas Dataframe

In [304]:
l = [1,4,577,343]
l

[1, 4, 577, 343]

In [305]:
data = pd.DataFrame(l, columns=['values'])
type(data)

pandas.core.frame.DataFrame

In [306]:
data.axes

[RangeIndex(start=0, stop=4, step=1), Index(['values'], dtype='object')]

In [307]:
data.shape

(4, 1)

In [308]:
data.ndim

2

In [309]:
data.values

array([[  1],
       [  4],
       [577],
       [343]], dtype=int64)

In [310]:
data.head(2)

Unnamed: 0,values
0,1
1,4


In [311]:
data.tail(2)

Unnamed: 0,values
2,577
3,343


In [312]:
a=np.array([1,2,3,4,566])
type(a)

numpy.ndarray

In [313]:
pd.DataFrame(a,columns=['values'])

Unnamed: 0,values
0,1
1,2
2,3
3,4
4,566


In [314]:
v = np.arange(1,10).reshape(3,3)
v

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [315]:
data=pd.DataFrame(v,columns=['v1','v2','v3'])
data

Unnamed: 0,v1,v2,v3
0,1,2,3
1,4,5,6
2,7,8,9


In [316]:
data.columns=('value1','value2','value3')
data

Unnamed: 0,value1,value2,value3
0,1,2,3
1,4,5,6
2,7,8,9


In [317]:
data=pd.DataFrame(v,columns=['v1','v2','v3'], index=['a','b','c'])
data

Unnamed: 0,v1,v2,v3
a,1,2,3
b,4,5,6
c,7,8,9


In [318]:
pd.DataFrame(pd.Series([1,2,3,4]),columns=['variable'])

Unnamed: 0,variable
0,1
1,2
2,3
3,4


In [319]:
a = pd.Series([1,2,3,4])
b = pd.Series([1,2,3,4])

In [320]:
pd.DataFrame({'variable1':a,
              'variable2':b})

Unnamed: 0,variable1,variable2
0,1,1
1,2,2
2,3,3
3,4,4


In [321]:
dict_ = {
    'reg':{'RMSE':101,
           'MSE':111,
           'SSE':121},
    'log':{'RMSE':102,
           'MSE':112,
           'SSE':122},
    'cart':{'RMSE':103,
           'MSE':113,
           'SSE':123},
}

In [322]:
pd.DataFrame(dict_)

Unnamed: 0,reg,log,cart
RMSE,101,102,103
MSE,111,112,113
SSE,121,122,123


In [323]:
s1 = np.random.randint(10,size=5)
s2 = np.random.randint(10,size=5)
s3 = np.random.randint(10,size=5)
df = pd.DataFrame({'var1':s1,'var2':s2,'var3':s3})
df

Unnamed: 0,var1,var2,var3
0,2,8,1
1,0,4,7
2,9,1,5
3,9,5,2
4,6,6,1


In [324]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,2,8,1


In [325]:
df.index=['a','b','c','d','e']

In [326]:
df['c':'e']

Unnamed: 0,var1,var2,var3
c,9,1,5
d,9,5,2
e,6,6,1


In [327]:
df.drop('a', axis=0, inplace=True)

In [328]:
df

Unnamed: 0,var1,var2,var3
b,0,4,7
c,9,1,5
d,9,5,2
e,6,6,1


In [329]:
l = ['b','c']

In [330]:
l

['b', 'c']

In [331]:
df.drop(l,axis=0)

Unnamed: 0,var1,var2,var3
d,9,5,2
e,6,6,1


In [332]:
'var1' in df

True

In [333]:
l = ['var1','var2','var7']

In [334]:
for i in l:
    print(i in df)

True
True
False


In [335]:
df['var1'] is df['var2']

False

In [336]:
df['var1'] # dictionary type choice

b    0
c    9
d    9
e    6
Name: var1, dtype: int32

In [337]:
df.var1 # attribute type choice

b    0
c    9
d    9
e    6
Name: var1, dtype: int32

In [338]:
df[['var1','var2']]

Unnamed: 0,var1,var2
b,0,4
c,9,1
d,9,5
e,6,6


In [339]:
l = ['var1','var2']
df[l]

Unnamed: 0,var1,var2
b,0,4
c,9,1
d,9,5
e,6,6


In [340]:
df['added_column'] = df['var1'] / df['var2']
df

Unnamed: 0,var1,var2,var3,added_column
b,0,4,7,0.0
c,9,1,5,9.0
d,9,5,2,1.8
e,6,6,1,1.0


In [341]:
df.drop('c',axis=0, inplace=True)

In [343]:
df

Unnamed: 0,var1,var2,var3,added_column
b,0,4,7,0.0
d,9,5,2,1.8
e,6,6,1,1.0


In [345]:
df.drop('var1', axis=1, inplace=True)

In [346]:
df

Unnamed: 0,var2,var3,added_column
b,4,7,0.0
d,5,2,1.8
e,6,1,1.0
