# Tableau dashboard (EDA)

Noted that you can click on different subcategories under the dashboard (eg. click on female label from the donut chart) to have an overview of the survival rate under that particular subcategory.

Feel free to comment if you think there can be any sort of improvements to the dashboard or the dataset used for the EDA, thanks!

Tableau link:

https://public.tableau.com/app/profile/jadon.ng/viz/TitanicEDA_16856334252930/Dashboard1

## Dataset used for making the Tableau dashboard

In [1]:
import pandas as pd

train = pd.read_csv('/kaggle/input/titanic/train.csv')

In [2]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import math

class FeatureDropper(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        return X.drop(['Name','PassengerId', 'Ticket','Cabin'],axis=1)

class Imputer(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        #Impute Age
        imputer = SimpleImputer(strategy='mean')
        X['Age'] = imputer.fit_transform(X['Age'].values.reshape(-1,1))[:,0]
        
        #Impute Embarked
        imputer = SimpleImputer(strategy='most_frequent')
        X['Embarked'] = imputer.fit_transform(X['Embarked'].values.reshape(-1,1))[:,0]
        
        #Impute Fare
        imputer = SimpleImputer(strategy='mean')
        X['Fare'] = imputer.fit_transform(X['Fare'].values.reshape(-1,1))[:,0]
        return X
    
class UnitAdjust(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        X['Age'] = X['Age'].apply(lambda y: math.floor(y))
        X['Fare'] = X['Fare'].apply(lambda f: round(f,1))
        return X
    

class Companion(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        X['companion'] = X['SibSp'] + X['Parch']
        
        def is_alone(x):
            if x > 0:
                return 1
            else:
                return 0
        X['companion'] = X['companion'].apply(is_alone)
        return X.drop(['SibSp','Parch'],axis=1)
    
class TitleFeature(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        lr = ['Mr','Mrs','Miss','Ms','Mlle']
        def rank_title(x):
            if x in lr:
                return 'LR'
            else:
                return 'HR'
        X['title'] = X['Name'].str.split(',',expand=True)[1].str.split('.',expand=True)[0].str.strip()
        X['title'] = X['title'].apply(rank_title)
        return X
      
class FeatureBinarize(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        # Gender binarize
        gender_dct = {'male':0, 'female':1}
        X['Sex'] = [gender_dct[g] for g in X['Sex']]
        
        #Title binarize - Classify low rank and high rank 
        def binarize_title(x):
            if x == 'LR':
                return 0
            else:
                return 1
        X['title'] = X['title'].apply(binarize_title)
    
        return X

class FeatureEncoder(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        encoder = OneHotEncoder()
        matrix = encoder.fit_transform(X["Embarked"].values.reshape(-1,1)).toarray()
        column_name = ['C','Q','S']
        for i in range(len(matrix.T)):
            X[column_name[i]] = matrix.T[i]
        return X.drop(['Embarked'],axis=1)
    
class TableauSTR(BaseEstimator,TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self,X):
        #Embarked dict
        embarked_dct = {'S': 'Southampton', 'C': 'Cherbourg', 'Q': 'Queenstown'}
        X['Embarked'] = [embarked_dct[e] for e in X['Embarked']]
        #pclass dct
        pclass_dct = {1: '1st', 2: '2nd', 3:'3rd'}
        X['Pclass'] = [pclass_dct[c] for c in X['Pclass']]
        #status_dct
#         status_dct = {0: 'Dead', 1:'Survived'}
#         X['Survived'] = [status_dct[s] for s in X['Survived']]
        return X

In [3]:
pipe_tableau = Pipeline([
    ('imputer',Imputer()),
    ('title',TitleFeature()),
    ('dropper',FeatureDropper()),
    ('converter',TableauSTR()),
    ('companion',Companion()),
    ('unit',UnitAdjust()),
])

df_tableau = pipe_tableau.fit_transform(train.copy())
df_tableau

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked,title,companion
0,0,3rd,male,22,7.2,Southampton,LR,1
1,1,1st,female,38,71.3,Cherbourg,LR,1
2,1,3rd,female,26,7.9,Southampton,LR,0
3,1,1st,female,35,53.1,Southampton,LR,1
4,0,3rd,male,35,8.1,Southampton,LR,0
...,...,...,...,...,...,...,...,...
886,0,2nd,male,27,13.0,Southampton,HR,0
887,1,1st,female,19,30.0,Southampton,LR,0
888,0,3rd,female,29,23.4,Southampton,LR,1
889,1,1st,male,26,30.0,Cherbourg,LR,0
