In [13]:
import pandas as pd
import numpy as np

#### Pandas allows chaining together functions that expect Series, DataFrames or GroupBy objects

#### create dataframe by reading the data set

In [84]:
df = pd.read_csv("titanic.csv", sep="\t", index_col='PassengerId')
df.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


#### starts the index from 0

In [17]:
def transform_index(df):
    df.index = np.arange(df.shape[0])
    return df

#### we noticed few NaNs in "Cabin" column

In [37]:
df["Cabin"].isna().sum()

125

#### Actually we have lots of (=125) NaNs
#### We would like to replace NaN by looking at the "Sex" column, It is replaced by the cabin which was frequent for a particular sex in that row

In [69]:
def replace_nans_cabin(df):
    most_common = df.groupby('Sex')['Cabin'].apply(lambda x: x.value_counts().index[0]).reset_index()
    df.loc[(df['Cabin'].isnull()) & (df.Sex == 'female'), 'Cabin'] = most_common[most_common['Sex']=='female']['Cabin'][0]
    df.loc[(df['Cabin'].isnull()) & (df.Sex == 'male'), 'Cabin'] = most_common[most_common['Sex']=='male']['Cabin'][1]
    return df

#### We also replace NaNs in "Age" column by replacing it with the mean age of that particular sex

In [89]:
def replace_nans_age(df):
    df['Age'] = df["Age"].fillna(df.groupby("Sex")["Age"].transform('mean'))
    return df

#### Creates pipeline

In [90]:
(df.pipe(transform_index)
.pipe(replace_nans_cabin)
 .pipe(replace_nans_age)
)

Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.000000,1,0,A/5 21171,7.2500,D26,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.000000,1,0,PC 17599,71.2833,C85,C
2,1,3,"Heikkinen, Miss. Laina",female,26.000000,0,0,STON/O2. 3101282,7.9250,F E69,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.000000,1,0,113803,53.1000,C123,S
4,0,3,"Allen, Mr. William Henry",male,35.000000,0,0,373450,8.0500,D26,S
5,0,3,"Moran, Mr. James",male,30.326962,0,0,330877,8.4583,D26,Q
6,0,1,"McCarthy, Mr. Timothy J",male,54.000000,0,0,17463,51.8625,E46,S
7,0,3,"Palsson, Master. Gosta Leonard",male,2.000000,3,1,349909,21.0750,D26,S
8,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.000000,0,2,347742,11.1333,F E69,S
9,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.000000,1,0,237736,30.0708,F E69,C
