# Data Manipulation

1. reindex
2. set_index
3. reset_index
4. sort_index

### Reindex function is used to reorder the index in the dataframe
It means it changes the row labels and colum labels of a dataframe

In [1]:
import numpy as np
import pandas as pd

In [3]:
index = ['Firefox','Chrome', 'Safari', 'IE10', 'Konqueror']
df = pd.DataFrame({'http_status': [200, 200, 404, 404, 301],
                  'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
                 index=index)
df

Unnamed: 0,http_status,response_time
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE10,404,0.08
Konqueror,301,1.0


In [4]:
new_index = ['Safari','Iceweasel', 'Comodo Dragon', 'IE10', 'Chrome']

df.reindex(new_index)

Unnamed: 0,http_status,response_time
Safari,404.0,0.07
Iceweasel,,
Comodo Dragon,,
IE10,404.0,0.08
Chrome,200.0,0.02


## Reset Index - used to reset the index or convert an index into a column

In [5]:
df = pd.DataFrame([('bird', 389.0),
                   ('bird', 24.0),
                   ('mammal', 80.5),
                   ('mammal', np.nan)],
                 index=['falcon','parrot','lion','monkey'],
                 columns=('class', 'max_speed'))
df

Unnamed: 0,class,max_speed
falcon,bird,389.0
parrot,bird,24.0
lion,mammal,80.5
monkey,mammal,


In [6]:
df.reset_index()

Unnamed: 0,index,class,max_speed
0,falcon,bird,389.0
1,parrot,bird,24.0
2,lion,mammal,80.5
3,monkey,mammal,


In [7]:
df.reset_index(drop=True)

Unnamed: 0,class,max_speed
0,bird,389.0
1,bird,24.0
2,mammal,80.5
3,mammal,


## Sort Index

In [8]:
df = pd.DataFrame({'month': [1, 4, 7, 10],
                  'year': [2012, 2014, 2013, 2014],
                  'sale': [55, 40, 84, 31]})
df

Unnamed: 0,month,year,sale
0,1,2012,55
1,4,2014,40
2,7,2013,84
3,10,2014,31


In [9]:
df['sale'].sort_values()

3    31
1    40
0    55
2    84
Name: sale, dtype: int64

In [10]:
df['sale'].sort_index()

0    55
1    40
2    84
3    31
Name: sale, dtype: int64

## Set Index

In [11]:
df = pd.DataFrame({'month': [1, 4, 7, 10],
                  'year': [2012, 2014, 2013, 2014],
                  'sale': [55, 40, 84, 31]})
df

Unnamed: 0,month,year,sale
0,1,2012,55
1,4,2014,40
2,7,2013,84
3,10,2014,31


In [12]:
df.set_index('month')

Unnamed: 0_level_0,year,sale
month,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2012,55
4,2014,40
7,2013,84
10,2014,31


# Replace and Droplevel

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
                   'B': ['abc', 'bar', 'xyz']})
df

Unnamed: 0,A,B
0,bat,abc
1,foo,bar
2,bait,xyz


In [5]:
df['A'] = df['A'].replace(('foo'), ('Ram'))
df

Unnamed: 0,A,B
0,bat,abc
1,Ram,bar
2,bait,xyz


## Droplevel Function

In [6]:
df = pd.DataFrame([
    [1, 2, 3, 4,],
    [5, 6, 7, 8],
    [9, 10, 11, 12]
]).set_index([0,1]).rename_axis(['a', 'b'])

df

Unnamed: 0_level_0,Unnamed: 1_level_0,2,3
a,b,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,3,4
5,6,7,8
9,10,11,12


In [8]:
df.droplevel('a')

Unnamed: 0_level_0,2,3
b,Unnamed: 1_level_1,Unnamed: 2_level_1
2,3,4
6,7,8
10,11,12


In [9]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2,3
a,b,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,3,4
5,6,7,8
9,10,11,12
