#
# Data types Correction and Mapping:
#

###
## Previous Codes
###

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression

a = sns.load_dataset('titanic')


In [3]:
def impute_age(pclass, sex):
    
    if sex == 'male':
        
        if pclass == 1:
            return a[(a['pclass'] == 1) & (a['sex'] == 'male')]['age'].mean()
        elif pclass == 2:
            return a[(a['pclass'] == 2) & (a['sex'] == 'male')]['age'].mean()
        elif pclass == 3:
            return a[(a['pclass'] == 3) & (a['sex'] == 'male')]['age'].mean()
        
    else: 
        
        if pclass == 1:
            return a[(a['pclass'] == 1) & (a['sex'] != 'male')]['age'].mean()
        elif pclass == 2:
            return a[(a['pclass'] == 2) & (a['sex'] != 'male')]['age'].mean()
        elif pclass == 3:
            return a[(a['pclass'] == 3) & (a['sex'] != 'male')]['age'].mean()
        
a['age'] = a.apply( lambda x: impute_age(x['pclass'], x['sex']) if np.isnan(x['age']) else x['age'], axis = 1)

In [4]:
titanic = a.drop(['deck', 'embark_town', 'alive'], axis = 1)
titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,alone
0,0,3,male,22.00,1,0,7.2500,S,Third,man,True,False
1,1,1,female,38.00,1,0,71.2833,C,First,woman,False,False
2,1,3,female,26.00,0,0,7.9250,S,Third,woman,False,True
3,1,1,female,35.00,1,0,53.1000,S,First,woman,False,False
4,0,3,male,35.00,0,0,8.0500,S,Third,man,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.00,0,0,13.0000,S,Second,man,True,True
887,1,1,female,19.00,0,0,30.0000,S,First,woman,False,True
888,0,3,female,21.75,1,2,23.4500,S,Third,woman,False,False
889,1,1,male,26.00,0,0,30.0000,C,First,man,True,True


In [5]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   survived    891 non-null    int64   
 1   pclass      891 non-null    int64   
 2   sex         891 non-null    object  
 3   age         891 non-null    float64 
 4   sibsp       891 non-null    int64   
 5   parch       891 non-null    int64   
 6   fare        891 non-null    float64 
 7   embarked    889 non-null    object  
 8   class       891 non-null    category
 9   who         891 non-null    object  
 10  adult_male  891 non-null    bool    
 11  alone       891 non-null    bool    
dtypes: bool(2), category(1), float64(2), int64(4), object(3)
memory usage: 65.5+ KB


###
## Converting Columns From Float to Int
###

In [6]:
titanic['age'] = titanic['age'].astype('int')
titanic['fare'] = titanic['fare'].astype('int')

###
## Now 'Age' & 'Fare' has been Converted to Int
###

In [8]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   survived    891 non-null    int64   
 1   pclass      891 non-null    int64   
 2   sex         891 non-null    object  
 3   age         891 non-null    int32   
 4   sibsp       891 non-null    int64   
 5   parch       891 non-null    int64   
 6   fare        891 non-null    int32   
 7   embarked    889 non-null    object  
 8   class       891 non-null    category
 9   who         891 non-null    object  
 10  adult_male  891 non-null    bool    
 11  alone       891 non-null    bool    
dtypes: bool(2), category(1), int32(2), int64(4), object(3)
memory usage: 58.6+ KB


###
## Converting Columns to Boolean from Object
###

In [11]:
genders = {'male' : 0, 'female': 1}
titanic['sex'] = titanic['sex'].map(genders)


In [60]:
#who_ = {'man' : 0, 'women': 1, 'child': 2}
#titanic['who'] = titanic['who'].map(who_)
# fix this with one-hot encoding because it has three values

In [12]:
titanic['adult_male'] = titanic['adult_male'].map({True: 1, False: 0})

In [13]:
titanic['alone'] = titanic['alone'].map({True: 1, False: 0})

###
## Our Dataset Now
###

In [14]:
titanic

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,alone
0,0,3,,22,1,0,7,S,Third,man,1,0
1,1,1,,38,1,0,71,C,First,woman,0,0
2,1,3,,26,0,0,7,S,Third,woman,0,1
3,1,1,,35,1,0,53,S,First,woman,0,0
4,0,3,,35,0,0,8,S,Third,man,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,,27,0,0,13,S,Second,man,1,1
887,1,1,,19,0,0,30,S,First,woman,0,1
888,0,3,,21,1,2,23,S,Third,woman,0,0
889,1,1,,26,0,0,30,C,First,man,1,1
