#### Series is a one-dimensional labeled array capable of holding data of any type (integer, string, float, python objects, etc.). The axis labels are collectively called index.

In [1]:
import pandas as pd
import numpy as np

In [2]:
a = ['Alex','Jack','Jeff']

In [3]:
pd.Series(a)

0    Alex
1    Jack
2    Jeff
dtype: object

In [4]:
a = ['Alex','Jack',None]

In [5]:
pd.Series(a)

0    Alex
1    Jack
2    None
dtype: object

In [6]:
a = [1,2,None]

In [7]:
d = pd.Series(a)

In [8]:
d

0    1.0
1    2.0
2    NaN
dtype: float64

In [9]:
np.nan == np.nan

False

In [10]:
np.nan == None

False

In [11]:
np.isnan(np.nan)

True

In [12]:
e = d[np.isnan(d)]
e

2   NaN
dtype: float64

In [13]:
dic = {'Alex':'Alex@gmail.com','John':'John12@gmail.com','Jeff':'Jeff@hotmail.com'}

In [14]:
dt = pd.Series(dic)

In [15]:
dt

Alex      Alex@gmail.com
John    John12@gmail.com
Jeff    Jeff@hotmail.com
dtype: object

In [16]:
dt.index

Index(['Alex', 'John', 'Jeff'], dtype='object')

In [17]:
dt['Alex']

'Alex@gmail.com'

In [18]:
data = [('Alex','Nick'),('Jack','Adam'),('Andrew','Jeff')]

pd.Series(data)

0      (Alex, Nick)
1      (Jack, Adam)
2    (Andrew, Jeff)
dtype: object

In [19]:
pd.Series(['Physics','Chemistry','Biology'],index = ['Alex','Andrew','Nick'])

Alex        Physics
Andrew    Chemistry
Nick        Biology
dtype: object

In [20]:
data = {'Alex':'Alex@gmail.com','Andrew':'Andrew123@gmail.com','John':'John12@gmail.com'}

data = pd.Series(data,index = ['Alex','John','Sam'])

In [21]:
data

Alex      Alex@gmail.com
John    John12@gmail.com
Sam                  NaN
dtype: object

In [22]:
data.iloc[1]

'John12@gmail.com'

In [23]:
data.loc['John']

'John12@gmail.com'

In [24]:
data[1]

'John12@gmail.com'

In [25]:
data['John']

'John12@gmail.com'

In [26]:
student_data = {
    90:'Physics',
    80:'Chemistry',
    100:'Biology'
}
stud_dt = pd.Series(student_data)

In [27]:
stud_dt.iloc[0]

'Physics'

#### Add rows to a series

In [28]:
s = pd.Series([1,2,3])
s.loc['Add'] = 8
s

0      1
1      2
2      3
Add    8
dtype: int64

#### Append two series

In [29]:
data = {'Alex':'Alex@gmail.com','Andrew':'Andrew122@gmail.com','John':'John@hotmail.com'}
d = pd.Series(data)

In [30]:
d1 = pd.Series(['Abc@gmail.com','xcy@gmail.com','pop@gmail.com'], index = ['kelly','kelly','kelly'])
d1

kelly    Abc@gmail.com
kelly    xcy@gmail.com
kelly    pop@gmail.com
dtype: object

In [31]:
all_data = d.append(d1)

In [32]:
all_data

Alex           Alex@gmail.com
Andrew    Andrew122@gmail.com
John         John@hotmail.com
kelly           Abc@gmail.com
kelly           xcy@gmail.com
kelly           pop@gmail.com
dtype: object

In [33]:
all_data.loc['kelly']

kelly    Abc@gmail.com
kelly    xcy@gmail.com
kelly    pop@gmail.com
dtype: object

#### The Dataframe is conceptually a two- dimensional series object, where there's an index and multiple columns of content with each column having a label.

In [34]:
record1 = pd.Series({'Name': 'Alice',
                        'Class': 'Physics',
                        'Score': 85})
record2 = pd.Series({'Name': 'Jack',
                        'Class': 'Chemistry',
                        'Score': 82})
record3 = pd.Series({'Name': 'Helen',
                        'Class': 'Biology',
                        'Score': 90})

In [35]:
pd.DataFrame([record1,record2,record3], index = ['Student1','Student2','Student3'])

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student3,Helen,Biology,90


In [36]:
students = [{'Name': 'Alice',
              'Class': 'Physics',
              'Score': 85},
            {'Name': 'Jack',
             'Class': 'Chemistry',
             'Score': 82},
            {'Name': 'Helen',
             'Class': 'Biology',
             'Score': 90}]

df = pd.DataFrame(students,index= ['Student1','Student2','Student1'])

In [37]:
type(df.loc['Student2'])

pandas.core.series.Series

In [38]:
type(df.loc['Student1'])

pandas.core.frame.DataFrame

In [39]:
df.loc['Student1','Name']

Student1    Alice
Student1    Helen
Name: Name, dtype: object

In [40]:
df

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student1,Helen,Biology,90


In [41]:
df1 = df.T

In [42]:
df1.loc['Name']

Student1    Alice
Student2     Jack
Student1    Helen
Name: Name, dtype: object

In [43]:
df.loc['Student1']['Name']

Student1    Alice
Student1    Helen
Name: Name, dtype: object

In [44]:
df.loc[:,'Name']

Student1    Alice
Student2     Jack
Student1    Helen
Name: Name, dtype: object

In [45]:
df.loc[:,['Name','Score']]

Unnamed: 0,Name,Score
Student1,Alice,85
Student2,Jack,82
Student1,Helen,90


In [46]:
df.drop('Student1')

Unnamed: 0,Name,Class,Score
Student2,Jack,Chemistry,82


In [47]:
df

Unnamed: 0,Name,Class,Score
Student1,Alice,Physics,85
Student2,Jack,Chemistry,82
Student1,Helen,Biology,90


### Drop dataframe

In [48]:
copy_df = df.copy()
#Axis = 1 to specify columns
copy_df.drop("Name",inplace = True,axis = 1)

In [49]:
del copy_df['Class']
copy_df

Unnamed: 0,Score
Student1,85
Student2,82
Student1,90


#### Add new columns in data frame

In [50]:
df['Rank'] = None

In [51]:
df

Unnamed: 0,Name,Class,Score,Rank
Student1,Alice,Physics,85,
Student2,Jack,Chemistry,82,
Student1,Helen,Biology,90,


### Dataframe indexing and loading

In [52]:
df = pd.read_csv('datasets/Admission_Predict.csv')

# And let's look at the first few rows
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [53]:
df = pd.read_csv('datasets/Admission_Predict.csv', index_col=0)
df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


### Rename dataframe

In [54]:
new_df=df.rename(columns={'GRE Score':'GRE Score', 'TOEFL Score':'TOEFL Score',
                   'University Rating':'University Rating', 
                   'SOP': 'Statement of Purpose','LOR': 'Letter of Recommendation',
                   'CGPA':'CGPA', 'Research':'Research',
                   'Chance of Admit':'Chance of Admit'})
new_df.head()

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,Statement of Purpose,LOR,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
5,314,103,2,2.0,3.0,8.21,0,0.65


In [55]:
new_df = new_df.rename(columns = {'LOR ': 'Letter of Recommendation'})
new_df

Unnamed: 0_level_0,GRE Score,TOEFL Score,University Rating,Statement of Purpose,Letter of Recommendation,CGPA,Research,Chance of Admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67


In [56]:
new_df = new_df.rename(mapper = str.strip,axis = 1)
new_df.columns

Index(['GRE Score', 'TOEFL Score', 'University Rating', 'Statement of Purpose',
       'Letter of Recommendation', 'CGPA', 'Research', 'Chance of Admit'],
      dtype='object')

In [57]:
cols = list(df.columns)
rename_cols = [x.lower().strip() for x in cols]
df.columns = rename_cols

In [58]:
df.columns

Index(['gre score', 'toefl score', 'university rating', 'sop', 'lor', 'cgpa',
       'research', 'chance of admit'],
      dtype='object')

### Querying a dataframe

In [59]:
df

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67


In [60]:
admit_chance = df['chance of admit']>0.7

In [63]:
df.where(admit_chance).head()

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337.0,118.0,4.0,4.5,4.5,9.65,1.0,0.92
2,324.0,107.0,4.0,4.0,4.5,8.87,1.0,0.76
3,316.0,104.0,3.0,3.0,3.5,8.0,1.0,0.72
4,322.0,110.0,3.0,3.5,2.5,8.67,1.0,0.8
5,,,,,,,,


In [64]:
df.where(admit_chance).dropna().head()

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337.0,118.0,4.0,4.5,4.5,9.65,1.0,0.92
2,324.0,107.0,4.0,4.0,4.5,8.87,1.0,0.76
3,316.0,104.0,3.0,3.0,3.5,8.0,1.0,0.72
4,322.0,110.0,3.0,3.5,2.5,8.67,1.0,0.8
6,330.0,115.0,5.0,4.5,3.0,9.34,1.0,0.9


In [65]:
df[df['chance of admit']>0.7].head()

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
6,330,115,5,4.5,3.0,9.34,1,0.9


In [67]:
df[['chance of admit','sop']].head()

Unnamed: 0_level_0,chance of admit,sop
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.92,4.5
2,0.76,4.0
3,0.72,3.0
4,0.8,3.5
5,0.65,2.0


In [71]:
df[(df['chance of admit']>0.7) & (df['chance of admit'] < 0.9)].head()

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
7,321,109,3,3.0,4.0,8.2,1,0.75
12,327,111,4,4.0,4.5,9.0,1,0.84


In [74]:
df[(df['chance of admit'].gt(0.7)) & (df['chance of admit'].lt(0.9))].head()

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.0,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.8
7,321,109,3,3.0,4.0,8.2,1,0.75
12,327,111,4,4.0,4.5,9.0,1,0.84


### Indexing dataframe

In [75]:
df

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit
Serial No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,337,118,4,4.5,4.5,9.65,1,0.92
2,324,107,4,4.0,4.5,8.87,1,0.76
3,316,104,3,3.0,3.5,8.00,1,0.72
4,322,110,3,3.5,2.5,8.67,1,0.80
5,314,103,2,2.0,3.0,8.21,0,0.65
...,...,...,...,...,...,...,...,...
396,324,110,3,3.5,3.5,9.04,1,0.82
397,325,107,3,3.0,3.5,9.11,1,0.84
398,330,116,4,5.0,4.5,9.45,1,0.91
399,312,103,3,3.5,4.0,8.78,0,0.67


In [76]:
df['Serail No.'] = df.index

In [77]:
df.set_index('chance of admit')

Unnamed: 0_level_0,gre score,toefl score,university rating,sop,lor,cgpa,research,Serail No.
chance of admit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.92,337,118,4,4.5,4.5,9.65,1,1
0.76,324,107,4,4.0,4.5,8.87,1,2
0.72,316,104,3,3.0,3.5,8.00,1,3
0.80,322,110,3,3.5,2.5,8.67,1,4
0.65,314,103,2,2.0,3.0,8.21,0,5
...,...,...,...,...,...,...,...,...
0.82,324,110,3,3.5,3.5,9.04,1,396
0.84,325,107,3,3.0,3.5,9.11,1,397
0.91,330,116,4,5.0,4.5,9.45,1,398
0.67,312,103,3,3.5,4.0,8.78,0,399


In [78]:
df= df.reset_index()
df

Unnamed: 0,Serial No.,gre score,toefl score,university rating,sop,lor,cgpa,research,chance of admit,Serail No.
0,1,337,118,4,4.5,4.5,9.65,1,0.92,1
1,2,324,107,4,4.0,4.5,8.87,1,0.76,2
2,3,316,104,3,3.0,3.5,8.00,1,0.72,3
3,4,322,110,3,3.5,2.5,8.67,1,0.80,4
4,5,314,103,2,2.0,3.0,8.21,0,0.65,5
...,...,...,...,...,...,...,...,...,...,...
395,396,324,110,3,3.5,3.5,9.04,1,0.82,396
396,397,325,107,3,3.0,3.5,9.11,1,0.84,397
397,398,330,116,4,5.0,4.5,9.45,1,0.91,398
398,399,312,103,3,3.5,4.0,8.78,0,0.67,399


### Multi-level indexing

In [81]:
dfIndex = pd.read_csv('datasets/census.csv')

In [82]:
dfIndex

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
0,40,3,6,1,0,Alabama,Alabama,4779736,4780127,4785161,...,0.002295,-0.193196,0.381066,0.582002,-0.467369,1.030015,0.826644,1.383282,1.724718,0.712594
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.832960,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.500690,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,50,4,8,56,37,Wyoming,Sweetwater County,43806,43806,43593,...,1.072643,16.243199,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195
3189,50,4,8,56,39,Wyoming,Teton County,21294,21294,21297,...,-1.589565,0.972695,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747
3190,50,4,8,56,41,Wyoming,Uinta County,21118,21118,21102,...,-17.755986,-4.916350,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351
3191,50,4,8,56,43,Wyoming,Washakie County,8533,8533,8545,...,-11.637475,-0.827815,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961


In [84]:
dfIndex['SUMLEV'].unique()

array([40, 50], dtype=int64)

In [85]:
dfIndex = dfIndex[dfIndex['SUMLEV'] == 50]
dfIndex

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2011,RDOMESTICMIG2012,RDOMESTICMIG2013,RDOMESTICMIG2014,RDOMESTICMIG2015,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015
1,50,3,6,1,1,Alabama,Autauga County,54571,54571,54660,...,7.242091,-2.915927,-3.012349,2.265971,-2.530799,7.606016,-2.626146,-2.722002,2.592270,-2.187333
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183193,...,14.832960,17.647293,21.845705,19.243287,17.197872,15.844176,18.559627,22.727626,20.317142,18.293499
3,50,3,6,1,5,Alabama,Barbour County,27457,27457,27341,...,-4.728132,-2.500690,-7.056824,-3.904217,-10.543299,-4.874741,-2.758113,-7.167664,-3.978583,-10.543299
4,50,3,6,1,7,Alabama,Bibb County,22915,22919,22861,...,-5.527043,-5.068871,-6.201001,-0.177537,0.177258,-5.088389,-4.363636,-5.403729,0.754533,1.107861
5,50,3,6,1,9,Alabama,Blount County,57322,57322,57373,...,1.807375,-1.177622,-1.748766,-2.062535,-1.369970,1.859511,-0.848580,-1.402476,-1.577232,-0.884411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,50,4,8,56,37,Wyoming,Sweetwater County,43806,43806,43593,...,1.072643,16.243199,-5.339774,-14.252889,-14.248864,1.255221,16.243199,-5.295460,-14.075283,-14.070195
3189,50,4,8,56,39,Wyoming,Teton County,21294,21294,21297,...,-1.589565,0.972695,19.525929,14.143021,-0.564849,0.654527,2.408578,21.160658,16.308671,1.520747
3190,50,4,8,56,41,Wyoming,Uinta County,21118,21118,21102,...,-17.755986,-4.916350,-6.902954,-14.215862,-12.127022,-18.136812,-5.536861,-7.521840,-14.740608,-12.606351
3191,50,4,8,56,43,Wyoming,Washakie County,8533,8533,8545,...,-11.637475,-0.827815,-2.013502,-17.781491,1.682288,-11.990126,-1.182592,-2.250385,-18.020168,1.441961


In [87]:
columns_to_keep = ['STNAME','CTYNAME','BIRTHS2010','BIRTHS2011','BIRTHS2012','BIRTHS2013',
                   'BIRTHS2014','BIRTHS2015','POPESTIMATE2010','POPESTIMATE2011',
                   'POPESTIMATE2012','POPESTIMATE2013','POPESTIMATE2014','POPESTIMATE2015']
dfIndex = dfIndex[columns_to_keep]
dfIndex.head()

Unnamed: 0,STNAME,CTYNAME,BIRTHS2010,BIRTHS2011,BIRTHS2012,BIRTHS2013,BIRTHS2014,BIRTHS2015,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015
1,Alabama,Autauga County,151,636,615,574,623,600,54660,55253,55175,55038,55290,55347
2,Alabama,Baldwin County,517,2187,2092,2160,2186,2240,183193,186659,190396,195126,199713,203709
3,Alabama,Barbour County,70,335,300,283,260,269,27341,27226,27159,26973,26815,26489
4,Alabama,Bibb County,44,266,245,259,247,253,22861,22733,22642,22512,22549,22583
5,Alabama,Blount County,183,744,710,646,618,603,57373,57711,57776,57734,57658,57673


In [89]:
dfIndex = dfIndex.set_index(['STNAME','CTYNAME'])

In [91]:
dfIndex.loc['Michigan', 'Washtenaw County']

BIRTHS2010            977
BIRTHS2011           3826
BIRTHS2012           3780
BIRTHS2013           3662
BIRTHS2014           3683
BIRTHS2015           3709
POPESTIMATE2010    345563
POPESTIMATE2011    349048
POPESTIMATE2012    351213
POPESTIMATE2013    354289
POPESTIMATE2014    357029
POPESTIMATE2015    358880
Name: (Michigan, Washtenaw County), dtype: int64

In [92]:
dfIndex.loc[ [('Michigan', 'Washtenaw County'),
         ('Michigan', 'Wayne County')] ]

Unnamed: 0_level_0,Unnamed: 1_level_0,BIRTHS2010,BIRTHS2011,BIRTHS2012,BIRTHS2013,BIRTHS2014,BIRTHS2015,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015
STNAME,CTYNAME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Michigan,Washtenaw County,977,3826,3780,3662,3683,3709,345563,349048,351213,354289,357029,358880
Michigan,Wayne County,5918,23819,23270,23377,23607,23586,1815199,1801273,1792514,1775713,1766008,1759335


### Missing values

In [94]:
df = pd.read_csv('datasets/class_grades.csv')
df.head(10)

Unnamed: 0,Prefix,Assignment,Tutorial,Midterm,TakeHome,Final
0,5,57.14,34.09,64.38,51.48,52.5
1,8,95.05,105.49,67.5,99.07,68.33
2,8,83.7,83.17,,63.15,48.89
3,7,,,49.38,105.93,80.56
4,8,91.32,93.64,95.0,107.41,73.89
5,7,95.0,92.58,93.12,97.78,68.06
6,8,95.05,102.99,56.25,99.07,50.0
7,7,72.85,86.85,60.0,,56.11
8,8,84.26,93.1,47.5,18.52,50.83
9,7,90.1,97.55,51.25,88.89,63.61


In [95]:
df[df.isnull()]

Unnamed: 0,Prefix,Assignment,Tutorial,Midterm,TakeHome,Final
0,,,,,,
1,,,,,,
2,,,,,,
3,,,,,,
4,,,,,,
...,...,...,...,...,...,...
94,,,,,,
95,,,,,,
96,,,,,,
97,,,,,,


In [96]:
df.dropna().head()

Unnamed: 0,Prefix,Assignment,Tutorial,Midterm,TakeHome,Final
0,5,57.14,34.09,64.38,51.48,52.5
1,8,95.05,105.49,67.5,99.07,68.33
4,8,91.32,93.64,95.0,107.41,73.89
5,7,95.0,92.58,93.12,97.78,68.06
6,8,95.05,102.99,56.25,99.07,50.0


In [97]:
df.fillna(0, inplace=True)
df.head(10)

Unnamed: 0,Prefix,Assignment,Tutorial,Midterm,TakeHome,Final
0,5,57.14,34.09,64.38,51.48,52.5
1,8,95.05,105.49,67.5,99.07,68.33
2,8,83.7,83.17,0.0,63.15,48.89
3,7,0.0,0.0,49.38,105.93,80.56
4,8,91.32,93.64,95.0,107.41,73.89
5,7,95.0,92.58,93.12,97.78,68.06
6,8,95.05,102.99,56.25,99.07,50.0
7,7,72.85,86.85,60.0,0.0,56.11
8,8,84.26,93.1,47.5,18.52,50.83
9,7,90.1,97.55,51.25,88.89,63.61
