# Pandas

## Pandas Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.Series([1,2,3,4])

0    1
1    2
2    3
3    4
dtype: int64

In [3]:
series = pd.Series([1,2,3,4])
type(series)

pandas.core.series.Series

In [4]:
series.axes

[RangeIndex(start=0, stop=4, step=1)]

In [5]:
series.dtype

dtype('int64')

In [6]:
series.empty

False

In [7]:
series.ndim

1

In [8]:
series.size

4

In [9]:
series.values

array([1, 2, 3, 4], dtype=int64)

In [10]:
series[:4]

0    1
1    2
2    3
3    4
dtype: int64

In [11]:
series.head(2)

0    1
1    2
dtype: int64

In [12]:
series.tail(2)

2    3
3    4
dtype: int64

In [13]:
a=np.array([1,2,3,55,77,888])
a

array([  1,   2,   3,  55,  77, 888])

In [14]:
series = pd.Series(a)
series

0      1
1      2
2      3
3     55
4     77
5    888
dtype: int32

In [15]:
series.index

RangeIndex(start=0, stop=6, step=1)

In [16]:
pd.Series([1,5,0.8,34], index=[1,3,5,7])

1     1.0
3     5.0
5     0.8
7    34.0
dtype: float64

In [17]:
series = pd.Series([1,2,3,66,88,9,0.3], index=['a','b','c','d','e','f','g'])

In [18]:
series['g']

0.3

In [19]:
dictionary = {'reg':10, 'log':11, 'cart':12}
dictionary

{'reg': 10, 'log': 11, 'cart': 12}

In [20]:
series =pd.Series(dictionary)

In [21]:
series['reg']

10

In [22]:
series['log':'cart']

log     11
cart    12
dtype: int64

In [23]:
pd.concat([series,series])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [24]:
series.append(series)

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [25]:
series

reg     10
log     11
cart    12
dtype: int64

## Index Operations

In [26]:
series = pd.Series([1,2,3,4,5,66,77])
series

0     1
1     2
2     3
3     4
4     5
5    66
6    77
dtype: int64

In [27]:
series.keys

<bound method Series.keys of 0     1
1     2
2     3
3     4
4     5
5    66
6    77
dtype: int64>

In [28]:
list(series.items())

[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 66), (6, 77)]

In [29]:
series.values

array([ 1,  2,  3,  4,  5, 66, 77], dtype=int64)

In [30]:
5 in series

True

In [31]:
series[[4,5]]

4     5
5    66
dtype: int64

In [32]:
series =pd.Series(dictionary)
series

reg     10
log     11
cart    12
dtype: int64

In [33]:
series['reg':'cart']

reg     10
log     11
cart    12
dtype: int64

In [34]:
series[(series>10) & (series<12)]

log    11
dtype: int64

In [35]:
data = pd.Series(['a','b','c'], index = [1,3,5])
data

1    a
3    b
5    c
dtype: object

In [36]:
data[1] # data[0] will gives error

'a'

In [37]:
data[0:3]

1    a
3    b
5    c
dtype: object

In [38]:
data.loc[5]

'c'

In [39]:
data.loc[0:4] # label based indexing location

1    a
3    b
dtype: object

In [40]:
data.iloc[0] # positional indexing, iloc , catch by resetting index

'a'

## Pandas Dataframe

In [41]:
l = [1,4,577,343]
l

[1, 4, 577, 343]

In [42]:
data = pd.DataFrame(l, columns=['values'])
type(data)

pandas.core.frame.DataFrame

In [43]:
data.axes

[RangeIndex(start=0, stop=4, step=1), Index(['values'], dtype='object')]

In [44]:
data.shape

(4, 1)

In [45]:
data.ndim

2

In [46]:
data.values

array([[  1],
       [  4],
       [577],
       [343]], dtype=int64)

In [47]:
data.head(2)

Unnamed: 0,values
0,1
1,4


In [48]:
data.tail(2)

Unnamed: 0,values
2,577
3,343


In [49]:
a=np.array([1,2,3,4,566])
type(a)

numpy.ndarray

In [50]:
pd.DataFrame(a,columns=['values'])

Unnamed: 0,values
0,1
1,2
2,3
3,4
4,566


In [51]:
v = np.arange(1,10).reshape(3,3)
v

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [52]:
data=pd.DataFrame(v,columns=['v1','v2','v3'])
data

Unnamed: 0,v1,v2,v3
0,1,2,3
1,4,5,6
2,7,8,9


In [53]:
data.columns=('value1','value2','value3')
data

Unnamed: 0,value1,value2,value3
0,1,2,3
1,4,5,6
2,7,8,9


In [54]:
data=pd.DataFrame(v,columns=['v1','v2','v3'], index=['a','b','c'])
data

Unnamed: 0,v1,v2,v3
a,1,2,3
b,4,5,6
c,7,8,9


In [55]:
pd.DataFrame(pd.Series([1,2,3,4]),columns=['variable'])

Unnamed: 0,variable
0,1
1,2
2,3
3,4


In [56]:
a = pd.Series([1,2,3,4])
b = pd.Series([1,2,3,4])

In [57]:
pd.DataFrame({'variable1':a,
              'variable2':b})

Unnamed: 0,variable1,variable2
0,1,1
1,2,2
2,3,3
3,4,4


In [58]:
dict_ = {
    'reg':{'RMSE':101,
           'MSE':111,
           'SSE':121},
    'log':{'RMSE':102,
           'MSE':112,
           'SSE':122},
    'cart':{'RMSE':103,
           'MSE':113,
           'SSE':123},
}

In [59]:
pd.DataFrame(dict_)

Unnamed: 0,reg,log,cart
RMSE,101,102,103
MSE,111,112,113
SSE,121,122,123


In [60]:
s1 = np.random.randint(10,size=5)
s2 = np.random.randint(10,size=5)
s3 = np.random.randint(10,size=5)
df = pd.DataFrame({'var1':s1,'var2':s2,'var3':s3})
df

Unnamed: 0,var1,var2,var3
0,6,1,7
1,3,3,5
2,2,2,6
3,5,3,1
4,8,4,4


In [61]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,6,1,7


In [62]:
df.index=['a','b','c','d','e']

In [63]:
df['c':'e']

Unnamed: 0,var1,var2,var3
c,2,2,6
d,5,3,1
e,8,4,4


In [64]:
df.drop('a', axis=0, inplace=True)

In [65]:
df

Unnamed: 0,var1,var2,var3
b,3,3,5
c,2,2,6
d,5,3,1
e,8,4,4


In [66]:
l = ['b','c']

In [67]:
l

['b', 'c']

In [68]:
df.drop(l,axis=0)

Unnamed: 0,var1,var2,var3
d,5,3,1
e,8,4,4


In [69]:
'var1' in df

True

In [70]:
l = ['var1','var2','var7']

In [71]:
for i in l:
    print(i in df)

True
True
False


In [72]:
df['var1'] is df['var2']

False

In [73]:
df['var1'] # dictionary type choice

b    3
c    2
d    5
e    8
Name: var1, dtype: int32

In [74]:
df.var1 # attribute type choice

b    3
c    2
d    5
e    8
Name: var1, dtype: int32

In [75]:
df[['var1','var2']]

Unnamed: 0,var1,var2
b,3,3
c,2,2
d,5,3
e,8,4


In [76]:
l = ['var1','var2']
df[l]

Unnamed: 0,var1,var2
b,3,3
c,2,2
d,5,3
e,8,4


In [77]:
df['added_column'] = df['var1'] / df['var2']
df

Unnamed: 0,var1,var2,var3,added_column
b,3,3,5,1.0
c,2,2,6,1.0
d,5,3,1,1.666667
e,8,4,4,2.0


In [78]:
df.drop('c',axis=0, inplace=True)

In [79]:
df

Unnamed: 0,var1,var2,var3,added_column
b,3,3,5,1.0
d,5,3,1,1.666667
e,8,4,4,2.0


In [80]:
df.drop('var1', axis=1, inplace=True)

In [81]:
df

Unnamed: 0,var2,var3,added_column
b,3,5,1.0
d,3,1,1.666667
e,4,4,2.0


## Reach to observations and variables

In [82]:
df

Unnamed: 0,var2,var3,added_column
b,3,5,1.0
d,3,1,1.666667
e,4,4,2.0


In [87]:
df.iloc[:3]

Unnamed: 0,var2,var3,added_column
b,3,5,1.0
d,3,1,1.666667
e,4,4,2.0


In [91]:
df.iloc[:2, :2]

Unnamed: 0,var2,var3
b,3,5
d,3,1


In [93]:
df['var3']

b    5
d    1
e    4
Name: var3, dtype: int32

In [96]:
df.loc[:'e', 'var3']

b    5
d    1
e    4
Name: var3, dtype: int32

In [105]:
df[df.var3 > 4]['var2']

b    3
Name: var2, dtype: int32

In [123]:
df[(df.var3 > 3 & (df.var3 < 2))]['var2'] # conditional index operations

b    3
e    4
Name: var2, dtype: int32

In [124]:
df.loc[df.var2 > 3, ['added_column']]

Unnamed: 0,added_column
e,2.0
