In [1]:
import numpy as np
import pandas as pd

In [2]:
mydict = {
 'state':['Mumbai','Mumbai','Mumbai','Pune','Pune','Pune'],
 'year':[2010,2011,2012,2011,2012,2013],
 'pop':[1.5,1.7,3.6,2.4,2.9,3.2]
}

In [3]:
df = pd.DataFrame(mydict,
                   columns=['year','state','pop','debt'],
                   index=['one','two','three','four','five','six'])

In [4]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,
five,2012,Pune,2.9,
six,2013,Pune,3.2,


## Assign a new Column

In [5]:
df['debt'] = np.arange(1,7)

In [6]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,1
two,2011,Mumbai,1.7,2
three,2012,Mumbai,3.6,3
four,2011,Pune,2.4,4
five,2012,Pune,2.9,5
six,2013,Pune,3.2,6


In [7]:
val = pd.Series([-1.2,-1.5,-1.7],
                index = ['two','four','five'])
val

two    -1.2
four   -1.5
five   -1.7
dtype: float64

In [8]:
df['debt'] = val

In [9]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,-1.2
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,-1.5
five,2012,Pune,2.9,-1.7
six,2013,Pune,3.2,


### How to Assign Boolean Value to a Column

In [10]:
df['western'] = df['state'] == 'Mumbai'

In [11]:
df

Unnamed: 0,year,state,pop,debt,western
one,2010,Mumbai,1.5,,True
two,2011,Mumbai,1.7,-1.2,True
three,2012,Mumbai,3.6,,True
four,2011,Pune,2.4,-1.5,False
five,2012,Pune,2.9,-1.7,False
six,2013,Pune,3.2,,False


## To Remove A Column From DataFrame

In [12]:
del df['western']

In [13]:
df

Unnamed: 0,year,state,pop,debt
one,2010,Mumbai,1.5,
two,2011,Mumbai,1.7,-1.2
three,2012,Mumbai,3.6,
four,2011,Pune,2.4,-1.5
five,2012,Pune,2.9,-1.7
six,2013,Pune,3.2,


## Index Objects

In [14]:
obj = pd.Series(range(3), index=['a','b','c'])
obj

a    0
b    1
c    2
dtype: int64

In [15]:
obj.index  # show you all the indexes of the series

Index(['a', 'b', 'c'], dtype='object')

In [16]:
my_index = obj.index
my_index

Index(['a', 'b', 'c'], dtype='object')

In [17]:
my_index[0], my_index[1], my_index[2]

('a', 'b', 'c')

In [18]:
my_index[0] = 'B'

TypeError: Index does not support mutable operations

## Re-Indexing

In [19]:
obj1 = pd.Series([4.1,7.5,-8.3,3.6], index=['d','b','a','c'])
obj1

d    4.1
b    7.5
a   -8.3
c    3.6
dtype: float64

In [20]:
obj2 = obj1.reindex(['a','b','c','d','e'])
obj2

a   -8.3
b    7.5
c    3.6
d    4.1
e    NaN
dtype: float64

### ffill => forward filling

In [23]:
obj3 = pd.Series(['blue','purple','yellow'], index=[0,2,4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [24]:
obj4 = obj3.reindex(range(6), method='ffill')
obj4

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [25]:
obj5 = obj3.reindex(range(6), method='bfill')
obj5

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

## Droping Entries From An Axis

In [26]:
obj = pd.Series(np.arange(5), index=['a','b','c','d','e'])
obj

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [27]:
new = obj.drop('c')
new

a    0
b    1
d    3
e    4
dtype: int32

In [28]:
new = obj.drop(['b','e'])   # dropping more than one row at a time
new

a    0
c    2
d    3
dtype: int32

In [29]:
obj

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [30]:
data = pd.DataFrame(np.arange(16).reshape((4,4)),
                    index=['Mumbai','Pune','Chennai','Delhi'],
                    columns=['one','two','three','four'])
data

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7
Chennai,8,9,10,11
Delhi,12,13,14,15


In [31]:
data.drop(['Chennai','Delhi'])    # throwing a copy of dropped rows

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7


In [32]:
data

Unnamed: 0,one,two,three,four
Mumbai,0,1,2,3
Pune,4,5,6,7
Chennai,8,9,10,11
Delhi,12,13,14,15


### To Remove A Column Using Drop

In [None]:
data.drop('three', axis=1)

In [None]:
data

### To Remove Multiple Columns Using Drop

In [None]:
data.drop(['one','four'], axis=1)

In [None]:
data

In [None]:
data.drop('two', axis=1, inplace=True)

In [None]:
data

In [None]:
data.drop('Delhi', inplace=True)

In [None]:
data

## Indexing, Selection & Filtering

In [None]:
obj = pd.Series(np.arange(4), index=['a','b','c','d'])
obj

In [None]:
obj[1:3]

In [None]:
obj < 2

In [None]:
obj[obj<2]

In [None]:
obj['a':'c']

In [None]:
obj['a':'c'] = 10

In [None]:
obj

In [None]:
data

In [None]:
data['one']   # fetching record for one col

In [None]:
data[['one','four']]   # fetching more than one col

## Arithmetic Operation With Series

In [None]:
ser1 = pd.Series([7.3, -2.5, 3.4, 1.5],
                index=['a','c','d','e'])
ser1

In [None]:
ser2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],
                index=['a','c','e','f','g'])
ser2

In [None]:
ser1 + ser2

In [None]:
df1 = pd.DataFrame(np.arange(9).reshape((3,3)),
                  columns=['b','c','d'],
                  index=['Mumbai','Pune','Nagpur'])
df1

In [None]:
df2 = pd.DataFrame(np.arange(12).reshape((4,3)),
                  columns=['b','d','e'],
                  index=['Delhi','Mumbai','Pune','Chennai'])
df2

In [None]:
df1 + df2