# Python Introductory Tutorial - The Series Data Structure

In [1]:
import pandas as pd
pd.Series?

In [2]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [3]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [4]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [5]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [6]:
import numpy as np
np.nan == None

False

In [7]:
np.nan == np.nan

False

In [8]:
np.isnan(np.nan)

True

In [9]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [10]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [11]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s

India      Tiger
America     Bear
Canada     Moose
dtype: object

In [12]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Querying a Series

In [13]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [14]:
s.iloc[3]

'South Korea'

In [15]:
s.loc['Golf']

'Scotland'

In [16]:
s[3]

'South Korea'

In [17]:
s['Golf']

'Scotland'

In [18]:
sports = {99: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)

In [19]:
#s[0] #This won't call s.iloc[0] as one might expect, it generates an error instead

In [20]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [21]:
total = 0
for item in s:
    total+=item
print(total)

324.0


In [22]:
import numpy as np

total = np.sum(s)
print(total)

324.0


In [23]:
#this creates a big series of random numbers
s = pd.Series(np.random.randint(0,1000,10000))
s.head()

0    352
1    364
2    319
3    982
4      1
dtype: int64

In [24]:
len(s)

10000

In [25]:
%%timeit -n 100
summary = 0
for item in s:
    summary+=item

1.47 ms ± 296 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [26]:
%%timeit -n 100
summary = np.sum(s)

77.5 µs ± 7.06 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [27]:
s+=2 #adds two to each item in s using broadcasting
s.head()

0    354
1    366
2    321
3    984
4      3
dtype: int64

In [28]:
for label, value in s.iteritems():
    s.at[label]=value+2    
s.head()

0    356
1    368
2    323
3    986
4      5
dtype: int64

In [29]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
    s.loc[label]= value+2

706 ms ± 146 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [30]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2


1.02 ms ± 296 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [31]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s

0             1
1             2
2             3
Animal    Bears
dtype: object

In [32]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)

In [33]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [34]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [35]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [36]:
all_countries.loc['Cricket']

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

# The DataFrame Data Structure

In [37]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
                        'Item Purchased': 'Dog Food',
                        'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
                        'Item Purchased': 'Kitty Litter',
                        'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
                        'Item Purchased': 'Bird Seed',
                        'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [38]:
df.loc['Store 2']

Name                  Vinod
Item Purchased    Bird Seed
Cost                    5.0
Name: Store 2, dtype: object

In [39]:
type(df.loc['Store 2'])

pandas.core.series.Series

In [40]:
df.loc['Store 1']

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5


In [41]:
df.loc['Store 1', 'Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [42]:
df.T

Unnamed: 0,Store 1,Store 1.1,Store 2
Name,Chris,Kevyn,Vinod
Item Purchased,Dog Food,Kitty Litter,Bird Seed
Cost,22.5,2.5,5.0


In [43]:
df.T.loc['Cost']

Store 1    22.5
Store 1     2.5
Store 2     5.0
Name: Cost, dtype: object

In [44]:
df['Cost']

Store 1    22.5
Store 1     2.5
Store 2     5.0
Name: Cost, dtype: float64

In [45]:
df.loc['Store 1']['Cost']

Store 1    22.5
Store 1     2.5
Name: Cost, dtype: float64

In [46]:
df.loc[:,['Name', 'Cost']]

Unnamed: 0,Name,Cost
Store 1,Chris,22.5
Store 1,Kevyn,2.5
Store 2,Vinod,5.0


In [47]:
df.drop('Store 1')

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Vinod,Bird Seed,5.0


In [48]:
df

Unnamed: 0,Name,Item Purchased,Cost
Store 1,Chris,Dog Food,22.5
Store 1,Kevyn,Kitty Litter,2.5
Store 2,Vinod,Bird Seed,5.0


In [49]:
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df

Unnamed: 0,Name,Item Purchased,Cost
Store 2,Vinod,Bird Seed,5.0


In [50]:
copy_df.drop?

In [51]:
del copy_df['Name']
copy_df

Unnamed: 0,Item Purchased,Cost
Store 2,Bird Seed,5.0


In [52]:
df['Location'] = None
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,22.5,
Store 1,Kevyn,Kitty Litter,2.5,
Store 2,Vinod,Bird Seed,5.0,


# Dataframe Indexing and Loading

In [53]:
costs = df['Cost']
costs

Store 1    22.5
Store 1     2.5
Store 2     5.0
Name: Cost, dtype: float64

In [54]:
costs+=2
costs

Store 1    24.5
Store 1     4.5
Store 2     7.0
Name: Cost, dtype: float64

In [55]:
df

Unnamed: 0,Name,Item Purchased,Cost,Location
Store 1,Chris,Dog Food,24.5,
Store 1,Kevyn,Kitty Litter,4.5,
Store 2,Vinod,Bird Seed,7.0,


In [56]:
!cat olympics.csv

cat: olympics.csv: No such file or directory


In [57]:
df = pd.read_csv('../python_introduction/data/titanic.csv', index_col = 0, skiprows=1)
df.head()

Unnamed: 0_level_0,3,male,22.0,1,0.1,7.25,S,Third,man,True,Unnamed: 11,Southampton,no,False
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True


In [58]:
df = pd.read_csv('../python_introduction/data/titanic.csv')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [59]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [60]:
for col in df.columns:
    if col[:2]=='su':
        df.rename(columns={col:'First' + col[4:]}, inplace=True)
    if col[:2]=='pc':
        df.rename(columns={col:'Second' + col[4:]}, inplace=True)
    if col[:2]=='se':
        df.rename(columns={col:'Third' + col[4:]}, inplace=True)
    if col[:1]=='f':
        df.rename(columns={col:'#' + col[1:]}, inplace=True) 

df.head()

Unnamed: 0,Firstived,Secondss,Third,age,sibsp,parch,#are,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


# Querying a DataFrame

In [61]:
df = pd.read_csv('../python_introduction/data/titanic.csv')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [62]:
df['pclass'] > 1

0       True
1      False
2       True
3      False
4       True
       ...  
886     True
887    False
888     True
889    False
890     True
Name: pclass, Length: 891, dtype: bool

In [63]:
only_gold = df.where(df['pclass'] > 2)
only_gold.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0.0,3.0,male,22.0,1.0,0.0,7.25,S,Third,man,True,,Southampton,no,False
1,,,,,,,,,,,,,,,
2,1.0,3.0,female,26.0,0.0,0.0,7.925,S,Third,woman,False,,Southampton,yes,True
3,,,,,,,,,,,,,,,
4,0.0,3.0,male,35.0,0.0,0.0,8.05,S,Third,man,True,,Southampton,no,True


In [64]:
only_gold['pclass'].count()

491

In [65]:
df['pclass'].count()

891

In [66]:
only_gold = only_gold.dropna()
only_gold.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
10,1.0,3.0,female,4.0,1.0,1.0,16.7,S,Third,child,False,G,Southampton,yes,False
75,0.0,3.0,male,25.0,0.0,0.0,7.65,S,Third,man,True,F,Southampton,no,True
205,0.0,3.0,female,2.0,0.0,1.0,10.4625,S,Third,child,False,G,Southampton,no,False
251,0.0,3.0,female,29.0,1.0,1.0,10.4625,S,Third,woman,False,G,Southampton,no,False
394,1.0,3.0,female,24.0,0.0,2.0,16.7,S,Third,woman,False,G,Southampton,yes,False


In [67]:
only_gold = df[df['pclass'] > 1]
only_gold.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True
5,0,3,male,,0,0,8.4583,Q,Third,man,True,,Queenstown,no,True
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False


In [68]:
len(df[(df['pclass'] > 1) | (df['pclass'] < 3)])

891

In [69]:
df[(df['pclass'] > 1) & (df['pclass'] == 2)]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
15,1,2,female,55.0,0,0,16.0000,S,Second,woman,False,,Southampton,yes,True
17,1,2,male,,0,0,13.0000,S,Second,man,True,,Southampton,yes,True
20,0,2,male,35.0,0,0,26.0000,S,Second,man,True,,Southampton,no,True
21,1,2,male,34.0,0,0,13.0000,S,Second,man,True,D,Southampton,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
866,1,2,female,27.0,1,0,13.8583,C,Second,woman,False,,Cherbourg,yes,False
874,1,2,female,28.0,1,0,24.0000,C,Second,woman,False,,Cherbourg,yes,False
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
883,0,2,male,28.0,0,0,10.5000,S,Second,man,True,,Southampton,no,True


# Indexing Dataframes

In [70]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [71]:
df['country'] = df.index
df = df.set_index('pclass')
df.head()

Unnamed: 0_level_0,survived,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,country
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
3,0,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,0
1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1
3,1,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,2
1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,3
3,0,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,4


In [72]:
df = df.reset_index()
df.head()

Unnamed: 0,pclass,survived,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone,country
0,3,0,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False,0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False,1
2,3,1,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True,2
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False,3
4,3,0,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True,4


In [73]:
df = pd.read_csv('../python_introduction/data/titanic.csv')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [74]:
df['pclass'].unique()

array([3, 1, 2])

In [75]:
df=df[df['pclass'] == 1]
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
6,0,1,male,54.0,0,0,51.8625,S,First,man,True,E,Southampton,no,True
11,1,1,female,58.0,0,0,26.55,S,First,woman,False,C,Southampton,yes,True
23,1,1,male,28.0,0,0,35.5,S,First,man,True,A,Southampton,yes,True


In [76]:
columns_to_keep = ['pclass',
                   'sex',
                   'age']
df = df[columns_to_keep]
df.head()

Unnamed: 0,pclass,sex,age
1,1,female,38.0
3,1,female,35.0
6,1,male,54.0
11,1,female,58.0
23,1,male,28.0


In [77]:
df = df.set_index(['pclass', 'sex'])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,age
pclass,sex,Unnamed: 2_level_1
1,female,38.0
1,female,35.0
1,male,54.0
1,female,58.0
1,male,28.0


In [78]:
df.loc[1,'female']

  df.loc[1,'female']


Unnamed: 0_level_0,Unnamed: 1_level_0,age
pclass,sex,Unnamed: 2_level_1
1,female,38.0
1,female,35.0
1,female,58.0
1,female,
1,female,49.0
1,...,...
1,female,45.0
1,female,48.0
1,female,47.0
1,female,56.0


In [79]:
df.loc[ [(1, 'female'),
         (1, 'male')] ]

Unnamed: 0_level_0,Unnamed: 1_level_0,age
pclass,sex,Unnamed: 2_level_1
1,female,38.0
1,female,35.0
1,female,58.0
1,female,
1,female,49.0
1,...,...
1,male,
1,male,51.0
1,male,31.0
1,male,33.0


# Missing values

In [80]:
df = pd.read_csv('../python_introduction/data/titanic.csv')
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [81]:
df.fillna?

In [82]:
df = df.set_index('pclass')
df = df.sort_index()
df

Unnamed: 0_level_0,survived,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,1,male,4.0,0,2,81.8583,S,First,child,False,A,Southampton,yes,False
1,1,female,24.0,0,0,83.1583,C,First,woman,False,C,Cherbourg,yes,True
1,1,female,30.0,0,0,56.9292,C,First,woman,False,E,Cherbourg,yes,True
1,1,female,17.0,1,0,108.9000,C,First,woman,False,C,Cherbourg,yes,False
1,1,female,,0,0,110.8833,C,First,woman,False,,Cherbourg,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,0,male,19.0,0,0,7.7750,S,Third,man,True,,Southampton,no,True
3,1,female,1.0,0,2,15.7417,C,Third,child,False,,Cherbourg,yes,False
3,0,male,32.0,0,0,7.9250,S,Third,man,True,,Southampton,no,True
3,0,male,18.0,1,0,6.4958,S,Third,man,True,,Southampton,no,False


In [83]:
df = df.reset_index()
df = df.set_index(['pclass', 'sex'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,male,1,4.0,0,2,81.8583,S,First,child,False,A,Southampton,yes,False
1,female,1,24.0,0,0,83.1583,C,First,woman,False,C,Cherbourg,yes,True
1,female,1,30.0,0,0,56.9292,C,First,woman,False,E,Cherbourg,yes,True
1,female,1,17.0,1,0,108.9000,C,First,woman,False,C,Cherbourg,yes,False
1,female,1,,0,0,110.8833,C,First,woman,False,,Cherbourg,yes,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,male,0,19.0,0,0,7.7750,S,Third,man,True,,Southampton,no,True
3,female,1,1.0,0,2,15.7417,C,Third,child,False,,Cherbourg,yes,False
3,male,0,32.0,0,0,7.9250,S,Third,man,True,,Southampton,no,True
3,male,0,18.0,1,0,6.4958,S,Third,man,True,,Southampton,no,False


In [84]:
df = df.fillna(method='ffill')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,survived,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
pclass,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,male,1,4.0,0,2,81.8583,S,First,child,False,A,Southampton,yes,False
1,female,1,24.0,0,0,83.1583,C,First,woman,False,C,Cherbourg,yes,True
1,female,1,30.0,0,0,56.9292,C,First,woman,False,E,Cherbourg,yes,True
1,female,1,17.0,1,0,108.9,C,First,woman,False,C,Cherbourg,yes,False
1,female,1,17.0,0,0,110.8833,C,First,woman,False,C,Cherbourg,yes,True
