In [1]:
import numpy as np
import pandas as pd

In [3]:
mydict = {
 'state':['Mumbai','Mumbai','Mumbai','Pune','Pune','Pune'],
 'year':[2010,2011,2012,2011,2012,2013],
 'pop':[1.5,1.7,3.6,2.4,2.9,3.2]
}

In [10]:
df = pd.DataFrame(mydict,
                   columns=['year','state','pop','debt'],
                   index=['one','two','three','four','five','six'])

In [11]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,
five,2012,Pune,2.9,
six,2013,Pune,3.2,


In [14]:
df['debt'] = np.arange(1,7)

In [15]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,1
two,2011,Mumbai,1.7,2
three,2012,Mumbai,3.6,3
four,2011,Pune,2.4,4
five,2012,Pune,2.9,5
six,2013,Pune,3.2,6


In [16]:
val = pd.Series([-1.2,-1.5,-1.7],
                index = ['two','four','five'])
val

two    -1.2
four   -1.5
five   -1.7
dtype: float64

In [17]:
df['debt'] = val

In [18]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,-1.2
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,-1.5
five,2012,Pune,2.9,-1.7
six,2013,Pune,3.2,


### How to Assign Boolean Value to a Column

In [19]:
df['western'] = df['state'] == 'Mumbai'

In [20]:
df

Unnamed: 0,year,state,pop,debt,western
one,2010,Mumbai,1.5,,True
two,2011,Mumbai,1.7,-1.2,True
three,2012,Mumbai,3.6,,True
four,2011,Pune,2.4,-1.5,False
five,2012,Pune,2.9,-1.7,False
six,2013,Pune,3.2,,False


## To Remove A Column From DataFrame

In [21]:
del df['western']

In [22]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,-1.2
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,-1.5
five,2012,Pune,2.9,-1.7
six,2013,Pune,3.2,


## Index Objects

In [23]:
obj = pd.Series(range(3), index=['a','b','c'])
obj

a    0
b    1
c    2
dtype: int64

In [24]:
obj.index  # show you all the indexes of the series

Index(['a', 'b', 'c'], dtype='object')

In [26]:
my_index = obj.index
my_index

Index(['a', 'b', 'c'], dtype='object')

In [29]:
my_index[0], my_index[1], my_index[2]

('a', 'b', 'c')

In [30]:
my_index[0] = 'B'

TypeError: Index does not support mutable operations

## Re-Indexing

In [32]:
obj1 = pd.Series([4.1,7.5,-8.3,3.6], index=['d','b','a','c'])
obj1

d    4.1
b    7.5
a   -8.3
c    3.6
dtype: float64

In [33]:
obj2 = obj1.reindex(['a','b','c','d','e'])
obj2

a   -8.3
b    7.5
c    3.6
d    4.1
e    NaN
dtype: float64

### ffill => forward filling

In [34]:
obj3 = pd.Series(['blue','purple','yellow'], index=[0,2,4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [35]:
obj4 = obj3.reindex(range(6), method='ffill')
obj4

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [36]:
obj5 = obj3.reindex(range(6), method='bfill')
obj5

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

## Droping Entries From An Axis

In [37]:
obj = pd.Series(np.arange(5), index=['a','b','c','d','e'])
obj

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [38]:
new = obj.drop('c')
new

a    0
b    1
d    3
e    4
dtype: int32

In [40]:
new = obj.drop(['b','e'])   # dropping more than one row at a time
new

a    0
c    2
d    3
dtype: int32

In [41]:
obj

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [42]:
data = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['Mumbai','Pune','Chennai','Delhi'],
                    columns=['one','two','three','four'])
data

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7
Chennai,8,9,10,11
Delhi,12,13,14,15


In [43]:
data.drop(['Chennai','Delhi'])    # throwing a copy of dropped rows

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7


In [44]:
data

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7
Chennai,8,9,10,11
Delhi,12,13,14,15


### To Remove A Column Using Drop

In [45]:
data.drop('three', axis=1)

Unnamed: 0,one,two,four
Mumbai,0,1,3
Pune,4,5,7
Chennai,8,9,11
Delhi,12,13,15


In [46]:
data

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7
Chennai,8,9,10,11
Delhi,12,13,14,15


### To Remove Multiple Columns Using Drop

In [47]:
data.drop(['one','four'], axis=1)

Unnamed: 0,two,three
Mumbai,1,2
Pune,5,6
Chennai,9,10
Delhi,13,14


In [48]:
data

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7
Chennai,8,9,10,11
Delhi,12,13,14,15


In [49]:
data.drop('two', axis=1, inplace=True)

In [50]:
data

Unnamed: 0,one,three,four
Mumbai,0,2,3
Pune,4,6,7
Chennai,8,10,11
Delhi,12,14,15


In [51]:
data.drop('Delhi', inplace=True)

In [52]:
data

Unnamed: 0,one,three,four
Mumbai,0,2,3
Pune,4,6,7
Chennai,8,10,11


## Indexing, Selection & Filtering

In [53]:
obj = pd.Series(np.arange(4), index=['a','b','c','d'])
obj

a    0
b    1
c    2
d    3
dtype: int32

In [54]:
obj[1:3]

b    1
c    2
dtype: int32

In [55]:
obj < 2

a     True
b     True
c    False
d    False
dtype: bool

In [56]:
obj[obj<2]

a    0
b    1
dtype: int32

In [58]:
obj['a':'c']

a    0
b    1
c    2
dtype: int32

In [59]:
obj['a':'c'] = 10

In [60]:
obj

a    10
b    10
c    10
d     3
dtype: int32

In [61]:
data

Unnamed: 0,one,three,four
Mumbai,0,2,3
Pune,4,6,7
Chennai,8,10,11


In [63]:
data['one']   # fetching record for one col

Mumbai     0
Pune       4
Chennai    8
Name: one, dtype: int32

In [64]:
data[['one','four']]   # fetching more than one col

Unnamed: 0,one,four
Mumbai,0,3
Pune,4,7
Chennai,8,11


## Arithmetic Operation With Series

In [65]:
ser1 = pd.Series([7.3, -2.5, 3.4, 1.5],
                index=['a','c','d','e'])
ser1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [66]:
ser2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],
                index=['a','c','e','f','g'])
ser2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [67]:
ser1 + ser2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [68]:
df1 = pd.DataFrame(np.arange(9).reshape((3,3)),
                  columns=['b','c','d'],
                  index=['Mumbai','Pune','Nagpur'])
df1

Unnamed: 0,b,c,d
Mumbai,0,1,2
Pune,3,4,5
Nagpur,6,7,8


In [69]:
df2 = pd.DataFrame(np.arange(12).reshape((4,3)),
                  columns=['b','d','e'],
                  index=['Delhi','Mumbai','Pune','Chennai'])
df2

Unnamed: 0,b,d,e
Delhi,0,1,2
Mumbai,3,4,5
Pune,6,7,8
Chennai,9,10,11


In [70]:
df1 + df2

Unnamed: 0,b,c,d,e
Chennai,,,,
Delhi,,,,
Mumbai,3.0,,6.0,
Nagpur,,,,
Pune,9.0,,12.0,
