Data Dictionary
| Variable | Description | Details |
| -------- | ----------- | ------- |
| survival | Survival    | 0 = No; 1 = Yes |
| pclass   | Passenger Class | 1 = upper; 2 = middle; 3 = lower |
| name     | First and Last Name | |
| sex      | Sex        | |
| age      | Age        | Fractional if Age less than One (1); If the Age is Estimated, it is in the form xx.5 |
| sibsp    | Number of Siblings/Spouses Aboard | |
| parch    | Number of Parents/Children Aboard | |
| ticket   | Ticket Number | |
| fare     | Passenger Fare | |
| cabin    | Cabin      | |
| embarked | Port of Embarkation | C = Cherbourg; Q = Queenstown; S = Southampton |

In [1]:
import pandas as pd
import numpy as np

import plotly as py
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

#### Load Dataset

In [2]:
train = pd.read_csv("train.csv")
df = train.copy(deep=True)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
df.shape

(891, 12)

In [4]:
dataInfo = pd.DataFrame(df.dtypes, columns=["dtypes"])
dataInfo["missing"] = df.isnull().sum()
dataInfo["% missing"] = round((df.isnull().sum() / len(df)) * 100, 2)
dataInfo = dataInfo.sort_values(by="% missing", ascending=False)
dataInfo


Unnamed: 0,dtypes,missing,% missing
Cabin,object,687,77.1
Age,float64,177,19.87
Embarked,object,2,0.22
PassengerId,int64,0,0.0
Survived,int64,0,0.0
Pclass,int64,0,0.0
Name,object,0,0.0
Sex,object,0,0.0
SibSp,int64,0,0.0
Parch,int64,0,0.0


In [5]:
# drop Cabin column since it has more than 50% missing values
df.drop(["Cabin"], axis=1, inplace=True)

# fill with mean since it is numerical variable
df["Age"].fillna(df["Age"].mean(), inplace=True)

# fill with mode since it is categorical variable
df["Embarked"].fillna(df["Embarked"].mode()[0], inplace=True)

In [6]:
# numerical variables summary 
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,13.002015,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,22.0,0.0,0.0,7.9104
50%,446.0,0.0,3.0,29.699118,0.0,0.0,14.4542
75%,668.5,1.0,3.0,35.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [7]:
# categorical variables summary
df.describe(include=['O'])

Unnamed: 0,Name,Sex,Ticket,Embarked
count,891,891,891,891
unique,891,2,681,3
top,"Braund, Mr. Owen Harris",male,347082,S
freq,1,577,7,646


In [8]:
df["Survived"].value_counts(normalize=True)

0    0.616162
1    0.383838
Name: Survived, dtype: float64

In [9]:
fig = px.scatter(df, x='Fare', y='Age', color='Survived', size='Fare')
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [10]:
fig = px.pie(df, names='Survived', title='Passenger Survival', hole=0.4)
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [11]:
fig = go.Figure(data=[go.Pie(labels=df['Embarked'], pull=[.1, .15, .15, 0])])
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [12]:
fig = make_subplots(rows=1, cols=3, specs=[[{"type": "pie"}, {"type": "pie"}, {"type": "pie"}]])



fig.add_trace(
            go.Pie(labels=df.loc[df['Embarked'] == 'C']['Survived'], pull = [.1, .1],
                   title = 'Embarked C vs. Survived'), row=1, col=1)

fig.add_trace(
            go.Pie(labels=df.loc[df['Embarked'] == 'S']['Survived'], pull = [.07, .07],
                   title = 'Embarked S vs. Survived'),row=1, col=2)

fig.add_trace(
            go.Pie(labels=df.loc[df['Embarked'] == 'Q']['Survived'], pull = [.1, .1],
                   title = 'Embarked Q vs. Survived'), row=1, col=3)


fig.update_layout(height=500, width=800, title_text="Gene Expression Features")
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [13]:
fig = px.histogram(df, x='Age', nbins=30, marginal='box', histnorm='probability density')
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [14]:
fig = px.box(df, x='Pclass', y="Age", points="all")
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [15]:
fig = px.box(df, x='Pclass', y="Age", notched=True, color="Survived")
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [16]:
fig = px.violin(df, x='Sex', y="Age", color='Survived', points="all")
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [17]:
fig = px.violin(df, x='Pclass', y="Age", color='Survived', violinmode='overlay')
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [18]:
fig = px.strip(df, x='Pclass', y="Age", color='Survived')
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [19]:
fig = px.strip(df, x='Sex', y="Age", color='Survived', stripmode="overlay")
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [20]:
fig = px.density_heatmap(df, x="Embarked", y="Pclass",
                        height=500, width=500)
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [21]:
fig = px.imshow(df.corr(method='pearson'), 
                title='Correlations Among Training Features',
                height=700, width=700)
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [22]:
fig = px.density_contour(df, x="SibSp", y="Parch",
                         height=400, width=800)
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [23]:
fig = px.density_contour(df, x="SibSp", y="Parch", color='Survived',
                        height=400, width=800)
fig.update_traces(contours_coloring="fill", contours_showlabels = True)
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [24]:
fig = px.scatter_3d(df, x='Pclass', y='Fare', z='Age',
              color='Survived')
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [25]:
fig = px.scatter_3d(df, x='Pclass', y='Fare', z='Age',
                    color='Survived', symbol='Sex')
fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [26]:
import plotly.graph_objects as go
import plotly.subplots as sp

fig = sp.make_subplots(rows=2, cols=4, subplot_titles=("Survived", "Pclass", "Sex", "SibSp", "Parch", "Embarked", "Fare", "Age"))

# Countplot - Survived
survived_count = go.Bar(x=df['Survived'].value_counts().index, y=df['Survived'].value_counts().values)
fig.add_trace(survived_count, row=1, col=1)

# Countplot - Pclass
pclass_count = go.Bar(x=df['Pclass'].value_counts().index, y=df['Pclass'].value_counts().values)
fig.add_trace(pclass_count, row=1, col=2)

# Countplot - Sex
sex_count = go.Bar(x=df['Sex'].value_counts().index, y=df['Sex'].value_counts().values)
fig.add_trace(sex_count, row=1, col=3)

# Countplot - SibSp
sibsp_count = go.Bar(x=df['SibSp'].value_counts().index, y=df['SibSp'].value_counts().values)
fig.add_trace(sibsp_count, row=1, col=4)

# Countplot - Parch
parch_count = go.Bar(x=df['Parch'].value_counts().index, y=df['Parch'].value_counts().values)
fig.add_trace(parch_count, row=2, col=1)

# Countplot - Embarked
embarked_count = go.Bar(x=df['Embarked'].value_counts().index, y=df['Embarked'].value_counts().values)
fig.add_trace(embarked_count, row=2, col=2)

# Distribution plot - Fare
fare_dist = go.Histogram(x=df['Fare'], histnorm='density', name='Fare')
fig.add_trace(fare_dist, row=2, col=3)

# Distribution plot - Age
age_dist = go.Histogram(x=df['Age'].dropna(), histnorm='density', name='Age')
fig.add_trace(age_dist, row=2, col=4)

fig.update_layout(showlegend=False, height=600, width=1000, title_text="Titanic Dataset Analysis")

fig.show()


|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [27]:
def group_by(df, col):
    new_df = df.groupby([col]).size().reset_index().rename(columns={0: 'count'})
    return new_df

In [28]:
color_1 = '#37536d'
color_2 = 'royalblue'
color_3 = '#f0f4f5'
bg_color = '#dfecf4'
colors = [color_1,color_2, color_3]

In [49]:
sex_count = group_by(df, 'Sex')
pclass_count = group_by(df, 'Pclass')
embarked_count = group_by(df, 'Embarked')

df["FamilySize"] = df["SibSp"] + df["Parch"] + 1
family_count = group_by(df, 'FamilySize')
survived_count = group_by(df, 'Survived')

fig = make_subplots(
    rows=8, cols=2,
    specs=[[{}, {'type':'domain'}],
           [{}, {'type':'domain'}],
           [{}, {'type':'domain'}],
           [{"rowspan": 2, "colspan": 2}, None],
           [None, None],
           [{"colspan": 2}, None],
           [{"rowspan": 2}, {"rowspan": 2, 'type':'domain'}],
           [None, None]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Pie', '<i>Pclass Bar', '<i>Pclass Pie',
                    '<i>Embarked Bar', '<i>Embarked Pie', '<i>Age Distribution',
                    '<i>Family Size', '<i>Survived Bar', '<i>Survived Pie'),
)

## Gender Plots
fig.add_trace(go.Bar(x=sex_count['Sex'], y=sex_count['count'],
                     text=sex_count['count'], name="", marker_color=colors), row=1, col=1)

fig.add_trace(go.Pie(labels=sex_count['Sex'], values=sex_count['count'], name="",
                     pull=[0.1, 0], marker_colors=[color_1, color_2]), 1, 2)


## Pclass Plots
fig.add_trace(go.Bar(x=pclass_count['Pclass'], y=pclass_count['count'],
                     text=pclass_count['count'], name="", marker_color=colors), row=2, col=1)

fig.add_trace(go.Pie(labels=pclass_count['Pclass'], values=pclass_count['count'], name="",
                     pull=[0, 0, 0.1], marker_colors=colors), 2, 2)


## Embarked Plots
fig.add_trace(go.Bar(x=embarked_count['Embarked'], y=embarked_count['count'],
                     text=embarked_count['count'], name="", marker_color=colors), row=3, col=1)

fig.add_trace(go.Pie(labels=embarked_count['Embarked'], values=embarked_count['count'], name="",
                     pull=[0, 0, 0.1], marker_colors=colors), 3, 2)


## Age Histogram
fig.add_trace(go.Histogram(x=df['Age'], name="", histnorm='density'), row=4, col=1)


## Family Plot
fig.add_trace(go.Bar(x=family_count['FamilySize'], y=family_count['count'],
                     text=family_count['count'], name="", marker_color=color_3), row=6, col=1)


## Survived Plots
fig.add_trace(go.Bar(x=survived_count['Survived'], y=survived_count['count'],
                     text=survived_count['count'], name="", marker_color=colors), row=7, col=1)

fig.add_trace(go.Pie(labels=survived_count['Survived'], values=survived_count['count'], name="",
                     pull=[0.05, 0], marker_colors=[color_1, color_2]), 7, 2)


fig.update_layout(height=1600, width=800,
                  showlegend=False,
                  title_text="Titanic Univariate", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_yaxes(showgrid=False)

fig.update_xaxes(categoryorder='array',
                 categoryarray= ['No Experience', '< 1 years', '1-3 years', '3-5 years', '5-10 years',
                                 '10-20 years', '20+ years'])


fig.update_traces( marker_line_color='#3f484b',
                  marker_line_width=2)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Gender Analysis

In [30]:
male_df = df[df['Sex'] == 'male']
female_df = df[df['Sex'] == 'female']

sex_pclass_df = df.groupby(['Sex', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
male_pclass_df = male_df.groupby(['Sex', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
female_pclass_df = female_df.groupby(['Sex', 'Pclass']).size().reset_index().rename(columns={0: 'count'})

sex_embarked_df = df.groupby(['Sex', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
male_embarked_df = male_df.groupby(['Sex', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
female_embarked_df = female_df.groupby(['Sex', 'Embarked']).size().reset_index().rename(columns={0: 'count'})

sex_survived_df = df.groupby(['Sex', 'Survived']).size().reset_index().rename(columns={0: 'count'})
male_survived_df = male_df.groupby(['Sex', 'Survived']).size().reset_index().rename(columns={0: 'count'})
female_survived_df = female_df.groupby(['Sex', 'Survived']).size().reset_index().rename(columns={0: 'count'})

In [31]:
fig = make_subplots(
    rows=6, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{}, {"type": "sunburst"}]],
    subplot_titles=('<i>Pclass Bar', '<i>Pclass Sunburst', '<i>Embarked Bar', '<i>Embarked Sunburst',
                   '<i>Male Age Distribution', '<i>Female Age Distribution', '<i>Family Size Distribution',
                   '<i>Survived Bar', '<i>Survived Sunburst'),
)

## Pclass Bars
fig.add_trace(go.Bar(y=male_pclass_df['Pclass'],
                x=male_pclass_df['count'], orientation='h',
                text=male_pclass_df['count'],
                name='Male',
                marker_color=color_1
                ), row=1, col=1)

fig.add_trace(go.Bar(y=female_pclass_df['Pclass'],
                x=female_pclass_df['count'], orientation='h',
                text=female_pclass_df['count'],
                name='Female',
                marker_color=color_2
                ), row=1, col=1)


## Sex and Pclass Sunburst
sb1 = px.sunburst(sex_pclass_df, values='count', path=['Sex', 'Pclass'], color='Sex',
                 color_discrete_sequence=[color_2, color_1])
fig.add_trace(sb1.data[0], row=1, col=2)


## Embarked Bars
fig.add_trace(go.Bar(y=male_embarked_df['Embarked'],
                x=male_embarked_df['count'], orientation='h',
                text=male_embarked_df['count'],
                name='Male',
                marker_color=color_1
                ), row=2, col=1)
fig.add_trace(go.Bar(y=female_embarked_df['Embarked'],
                x=female_embarked_df['count'], orientation='h',
                text=female_embarked_df['count'],
                name='Female',
                marker_color=color_2
                ),row=2, col=1)

## Sex and Embarked Sunburst
sb2 = px.sunburst(sex_embarked_df, values='count', path=['Sex', 'Embarked'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb2.data[0], row=2, col=2)


## Age Histogram
fig.add_trace(go.Histogram(x=male_df['Age'], name="Male", histnorm='percent',
                          marker_color=color_1), row=3, col=1)
fig.add_trace(go.Histogram(x=female_df['Age'], name="Female", histnorm='percent', 
                          marker_color=color_2), row=4, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=male_df['FamilySize'], name="Male", histnorm='percent',
                          marker_color=color_1), row=5, col=1)

fig.add_trace(go.Histogram(x=female_df['FamilySize'], name="Female", histnorm='percent',
                          marker_color=color_2), row=5, col=1)

## Survived Bars
fig.add_trace(go.Bar(x=male_survived_df['Survived'],
                y=male_survived_df['count'],
                text=male_survived_df['count'],
                name='Male',
                marker_color=color_1
                ), row=6, col=1)

fig.add_trace(go.Bar(x=female_survived_df['Survived'],
                y=female_survived_df['count'],
                text=female_survived_df['count'],
                name='Female',
                marker_color=color_2
                ), row=6, col=1)

## Sex and Survived Sunburst
sb3 = px.sunburst(sex_survived_df, values='count', path=['Sex', 'Survived'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=6, col=2)


fig.update_layout(height=1800, width=800,
                  showlegend=False,
                  title_text="Gender Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_traces(selector = ({'type': 'sunburst'}), textinfo = 'label+percent parent')

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [32]:
## Grouping Datasets
male_pclass_embarked_survive_df = male_df.groupby(['Pclass', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
female_pclass_embarked_survive_df = female_df.groupby(['Pclass', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})


## Creating Sunburst Figures
sb1 = px.sunburst(male_pclass_embarked_survive_df, values='count', path=['Pclass', 'Embarked', 'Survived'])
sb2 = px.sunburst(female_pclass_embarked_survive_df, values='count', path=['Pclass', 'Embarked', 'Survived'])

## Subplots
fig = make_subplots(rows=1, cols=2, specs=[
    [{"type": "sunburst"}, {"type": "sunburst"}]],
            subplot_titles=("Male X Pclass X Embarked X Survived", "Female X Pclass X Embarked X Survived"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=1, col=2)

fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Male vs Female Sunburst", title_x=0.5, height=600, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Pclass Analysis

In [33]:
pclass1_df = df[df['Pclass'] == 1]
pclass2_df = df[df['Pclass'] == 2]
pclass3_df = df[df['Pclass'] == 3]

pclass_sex_df = df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})
pclass1_sex_df = pclass1_df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})
pclass2_sex_df = pclass2_df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})
pclass3_sex_df = pclass3_df.groupby(['Pclass', 'Sex']).size().reset_index().rename(columns={0: 'count'})

pclass_embarked_df = df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
pclass1_embarked_df = pclass1_df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
pclass2_embarked_df = pclass2_df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
pclass3_embarked_df = pclass3_df.groupby(['Pclass', 'Embarked']).size().reset_index().rename(columns={0: 'count'})

pclass_survived_df = df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass1_survived_df = pclass1_df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass2_survived_df = pclass2_df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass3_survived_df = pclass3_df.groupby(['Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})

In [34]:
fig = make_subplots(
    rows=7, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{}, {"type": "sunburst"}]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Sunburst', '<i>Embarked Bar', '<i>Embarked Sunburst',
                   '<i>Pclass 3 Age Distribution', '<i>Pclass 2 Age Distribution',
                    '<i>Pclass 1 Age Distribution', '<i>Family Size Distribution',
                   '<i>Survived Bar', '<i>Survived Sunburst'),
)

## Gender Bars
fig.add_trace(go.Bar(y=pclass1_sex_df['Sex'],
                x=pclass1_sex_df['count'], orientation='h',
                text=pclass1_sex_df['count'],
                name='Pclass 1',
                marker_color=color_3
                ), row=1, col=1)

fig.add_trace(go.Bar(y=pclass2_sex_df['Sex'],
                x=pclass2_sex_df['count'], orientation='h',
                text=pclass2_sex_df['count'],
                name='Pclass 2',
                marker_color=color_2
                ), row=1, col=1)

fig.add_trace(go.Bar(y=pclass3_sex_df['Sex'],
                x=pclass3_sex_df['count'], orientation='h',
                text=pclass3_sex_df['count'],
                name='Pclass 3',
                marker_color=color_1
                ), row=1, col=1)


## Pclass and Sex Sunburst
sb1 = px.sunburst(pclass_sex_df, values='count', path=['Pclass', 'Sex'],
                 color_discrete_sequence=[color_3, color_2, color_1])
fig.add_trace(sb1.data[0], row=1, col=2)


## Embarked Bars
fig.add_trace(go.Bar(y=pclass1_embarked_df['Embarked'],
                x=pclass1_embarked_df['count'], orientation='h',
                text=pclass1_embarked_df['count'],
                name='Pclass1',
                marker_color=color_3
                ), row=2, col=1)
fig.add_trace(go.Bar(y=pclass2_embarked_df['Embarked'],
                x=pclass2_embarked_df['count'], orientation='h',
                text=pclass2_embarked_df['count'],
                name='Pclass 2',
                marker_color=color_2
                ),row=2, col=1)

fig.add_trace(go.Bar(y=pclass3_embarked_df['Embarked'],
                x=pclass3_embarked_df['count'], orientation='h',
                text=pclass3_embarked_df['count'],
                name='Pclass 3',
                marker_color=color_1
                ),row=2, col=1)


## Pclass and Embarked Sunburst
sb2 = px.sunburst(pclass_embarked_df, values='count', path=['Pclass', 'Embarked'],
                 color_discrete_sequence=[color_1, color_2, color_3])
fig.add_trace(sb2.data[0], row=2, col=2)


## Pclass Histogram
fig.add_trace(go.Histogram(x=pclass3_df['Age'], name="Pclass 3", histnorm='percent',
                          marker_color=color_1), row=3, col=1)
fig.add_trace(go.Histogram(x=pclass2_df['Age'], name="Pclass 2", histnorm='percent', 
                          marker_color=color_2), row=4, col=1)
fig.add_trace(go.Histogram(x=pclass1_df['Age'], name="Pclass 1", histnorm='percent', 
                          marker_color=color_3), row=5, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=pclass3_df['FamilySize'], name="Pclass 3", histnorm='percent',
                          marker_color=color_1), row=6, col=1)

fig.add_trace(go.Histogram(x=pclass2_df['FamilySize'], name="Pclass 2", histnorm='percent',
                          marker_color=color_2), row=6, col=1)

fig.add_trace(go.Histogram(x=pclass3_df['FamilySize'], name="Pclass 3", histnorm='percent',
                          marker_color=color_3), row=6, col=1)

## Survived Bars
fig.add_trace(go.Bar(x=pclass3_survived_df['Survived'],
                y=pclass3_survived_df['count'],
                text=pclass3_survived_df['count'],
                name='Pclass 3',
                marker_color=color_1
                ), row=7, col=1)

fig.add_trace(go.Bar(x=pclass2_survived_df['Survived'],
                y=pclass2_survived_df['count'],
                text=pclass2_survived_df['count'],
                name='Pclass 2',
                marker_color=color_2
                ), row=7, col=1)

fig.add_trace(go.Bar(x=pclass1_survived_df['Survived'],
                y=pclass1_survived_df['count'],
                text=pclass1_survived_df['count'],
                name='Pclass 1',
                marker_color=color_3
                ), row=7, col=1)

## Pclass and Survived Sunburst
sb3 = px.sunburst(pclass_survived_df, values='count', path=['Pclass', 'Survived'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=7, col=2)


fig.update_layout(height=2200, width=800,
                  showlegend=False,
                  title_text="Pclass Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_traces(selector = ({'type': 'sunburst'}), textinfo = 'label+percent parent')

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [35]:
## Grouping Datasets
pclass1_sex_embarked_survive_df = pclass1_df.groupby(['Sex', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass2_sex_embarked_survive_df = pclass2_df.groupby(['Sex', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
pclass3_sex_embarked_survive_df = pclass3_df.groupby(['Sex', 'Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})


## Creating Sunburst Figures
sb1 = px.sunburst(pclass1_sex_embarked_survive_df, values='count', path=['Sex', 'Embarked', 'Survived'])
sb2 = px.sunburst(pclass2_sex_embarked_survive_df, values='count', path=['Sex', 'Embarked', 'Survived'])
sb3 = px.sunburst(pclass3_sex_embarked_survive_df, values='count', path=['Sex', 'Embarked', 'Survived'])


## Subplots
fig = make_subplots(rows=3, cols=1, specs=[
    [{"type": "sunburst"}],
    [{"type": "sunburst"}],
    [{"type": "sunburst"}]],
            subplot_titles=("Pclass 1", "Pclass 2", "Pclass 3"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=2, col=1)
fig.add_trace(sb3.data[0], row=3, col=1)


fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Pclass1 vs Pclass2 vs Pclass3", height=1500, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Embarked Analysis

In [36]:
embarked_S_df = df[df['Embarked'] == 'S']
embarked_Q_df = df[df['Embarked'] == 'Q']
embarked_C_df = df[df['Embarked'] == 'C']

embarked_sex_df = df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})
embarked_S_sex_df = embarked_S_df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_sex_df = embarked_Q_df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})
embarked_C_sex_df = embarked_C_df.groupby(['Embarked', 'Sex']).size().reset_index().rename(columns={0: 'count'})

embarked_pclass_df = df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
embarked_S_pclass_df = embarked_S_df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_pclass_df = embarked_Q_df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
embarked_C_pclass_df = embarked_C_df.groupby(['Embarked', 'Pclass']).size().reset_index().rename(columns={0: 'count'})

embarked_survived_df = df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_S_survived_df = embarked_S_df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_survived_df = embarked_Q_df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_C_survived_df = embarked_C_df.groupby(['Embarked', 'Survived']).size().reset_index().rename(columns={0: 'count'})

In [37]:
fig = make_subplots(
    rows=7, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{}, {"type": "sunburst"}]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Sunburst', '<i>Pclass Bar', '<i>Pclass Sunburst',
                   '<i>Embarked S Age Distribution', '<i>Embarked Q Age Distribution',
                    '<i>Embarked C Age Distribution', '<i>Family Size Distribution',
                   '<i>Survived Bar', '<i>Survived Sunburst'),
)

## Gender Bars
fig.add_trace(go.Bar(y=embarked_S_sex_df['Sex'],
                x=embarked_S_sex_df['count'], orientation='h',
                text=embarked_S_sex_df['count'],
                name='Embarked S',
                marker_color=color_3
                ), row=1, col=1)

fig.add_trace(go.Bar(y=embarked_Q_sex_df['Sex'],
                x=embarked_Q_sex_df['count'], orientation='h',
                text=embarked_Q_sex_df['count'],
                name='Embarked Q',
                marker_color=color_2
                ), row=1, col=1)

fig.add_trace(go.Bar(y=embarked_C_sex_df['Sex'],
                x=embarked_C_sex_df['count'], orientation='h',
                text=embarked_C_sex_df['count'],
                name='Embarked C',
                marker_color=color_1
                ), row=1, col=1)


## Embarked and Sex Sunburst
sb1 = px.sunburst(embarked_sex_df, values='count', path=['Embarked', 'Sex'],
                 color_discrete_sequence=[color_3, color_2, color_1])
fig.add_trace(sb1.data[0], row=1, col=2)


## Pclass Bars
fig.add_trace(go.Bar(y=embarked_S_pclass_df['Pclass'],
                x=embarked_S_pclass_df['count'], orientation='h',
                text=embarked_S_pclass_df['count'],
                name='Embarked S',
                marker_color=color_3
                ), row=2, col=1)
fig.add_trace(go.Bar(y=embarked_Q_pclass_df['Pclass'],
                x=embarked_Q_pclass_df['count'], orientation='h',
                text=embarked_Q_pclass_df['count'],
                name='Embarked Q',
                marker_color=color_2
                ),row=2, col=1)

fig.add_trace(go.Bar(y=embarked_C_pclass_df['Pclass'],
                x=embarked_C_pclass_df['count'], orientation='h',
                text=embarked_C_pclass_df['count'],
                name='Embarked C',
                marker_color=color_1
                ),row=2, col=1)


## Embarked and Pclass Sunburst
sb2 = px.sunburst(embarked_pclass_df, values='count', path=['Embarked', 'Pclass'],
                 color_discrete_sequence=[color_1, color_2, color_3])
fig.add_trace(sb2.data[0], row=2, col=2)


## Embarked Histogram
fig.add_trace(go.Histogram(x=embarked_S_df['Age'], name="Embarked S", histnorm='percent',
                          marker_color=color_1), row=3, col=1)
fig.add_trace(go.Histogram(x=embarked_Q_df['Age'], name="Embarked Q", histnorm='percent', 
                          marker_color=color_2), row=4, col=1)
fig.add_trace(go.Histogram(x=embarked_C_df['Age'], name="Embarked C", histnorm='percent', 
                          marker_color=color_3), row=5, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=embarked_S_df['FamilySize'], name="Embarked S", histnorm='percent',
                          marker_color=color_1), row=6, col=1)

fig.add_trace(go.Histogram(x=embarked_Q_df['FamilySize'], name="Embarked Q", histnorm='percent',
                          marker_color=color_2), row=6, col=1)

fig.add_trace(go.Histogram(x=embarked_C_df['FamilySize'], name="Embarked C", histnorm='percent',
                          marker_color=color_3), row=6, col=1)

## Survived Bars
fig.add_trace(go.Bar(x=embarked_S_survived_df['Survived'],
                y=embarked_S_survived_df['count'],
                text=embarked_S_survived_df['count'],
                name='Embarked S',
                marker_color=color_1
                ), row=7, col=1)

fig.add_trace(go.Bar(x=embarked_Q_survived_df['Survived'],
                y=embarked_Q_survived_df['count'],
                text=embarked_Q_survived_df['count'],
                name='Embarked Q',
                marker_color=color_2
                ), row=7, col=1)

fig.add_trace(go.Bar(x=embarked_C_survived_df['Survived'],
                y=embarked_C_survived_df['count'],
                text=embarked_C_survived_df['count'],
                name='Embarked C',
                marker_color=color_3
                ), row=7, col=1)

## Embarked and Survived Sunburst
sb3 = px.sunburst(embarked_survived_df, values='count', path=['Embarked', 'Survived'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=7, col=2)


fig.update_layout(height=2200, width=800,
                  showlegend=False,
                  title_text="Embarked Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_traces(selector = ({'type': 'sunburst'}), textinfo = 'label+percent parent')

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [38]:
## Grouping Datasets
embarked_S_sex_pclass_survive_df = embarked_S_df.groupby(['Sex', 'Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_Q_sex_pclass_survive_df = embarked_Q_df.groupby(['Sex', 'Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})
embarked_C_sex_pclass_survive_df = embarked_C_df.groupby(['Sex', 'Pclass', 'Survived']).size().reset_index().rename(columns={0: 'count'})


## Creating Sunburst Figures
sb1 = px.sunburst(embarked_S_sex_pclass_survive_df, values='count', path=['Sex', 'Pclass', 'Survived'])
sb2 = px.sunburst(embarked_Q_sex_pclass_survive_df, values='count', path=['Sex', 'Pclass', 'Survived'])
sb3 = px.sunburst(embarked_C_sex_pclass_survive_df, values='count', path=['Sex', 'Pclass', 'Survived'])


## Subplots
fig = make_subplots(rows=3, cols=1, specs=[
    [{"type": "sunburst"}],
    [{"type": "sunburst"}],
    [{"type": "sunburst"}]],
            subplot_titles=("Embarked S", "Embarked Q", "Embarked C"))

## Plotting Figures
fig.add_trace(sb1.data[0], row=1, col=1)
fig.add_trace(sb2.data[0], row=2, col=1)
fig.add_trace(sb3.data[0], row=3, col=1)


fig.update_traces(textinfo="label+percent parent")

# Update title and height
fig.update_layout(title_text="Embarked S vs Embarked Q vs Embarked C", height=1500, template='plotly_dark', showlegend=False,
        font=dict(
            family="Rubik",
            size=14)
)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Survived Analysis

In [39]:
survived_df = df[df['Survived'] == '1']
not_survived_df = df[df['Survived'] == '0']

survived_sex_df = df.groupby(['Survived', 'Sex']).size().reset_index().rename(columns={0: 'count'})
survive_sex_df = survived_df.groupby(['Survived', 'Sex']).size().reset_index().rename(columns={0: 'count'})
not_survive_sex_df = not_survived_df.groupby(['Survived', 'Sex']).size().reset_index().rename(columns={0: 'count'})

survived_pclass_df = df.groupby(['Survived', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
survive_pclass_df = survived_df.groupby(['Survived', 'Pclass']).size().reset_index().rename(columns={0: 'count'})
not_survive_pclass_df = not_survived_df.groupby(['Survived', 'Pclass']).size().reset_index().rename(columns={0: 'count'})

survived_embarked_df = df.groupby(['Survived', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
survive_embarked_df = survived_df.groupby(['Survived', 'Embarked']).size().reset_index().rename(columns={0: 'count'})
not_survive_embarked_df = not_survived_df.groupby(['Survived', 'Embarked']).size().reset_index().rename(columns={0: 'count'})

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

In [40]:
fig = make_subplots(
    rows=6, cols=2,
    specs=[[{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{}, {"type": "sunburst"}],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None],
           [{"colspan": 2}, None]],
    subplot_titles=('<i>Gender Bar', '<i>Gender Sunburst', '<i>Pclass Bar', '<i>Pclass Sunburst',
                    '<i>Embarked Bar', '<i>Embarked Sunburst',
                   '<i>Survived Age Distribution(%)', '<i>Not Survived Age Distribution(%)', '<i>Family Size Distribution(%)'),
)


## Gender Bars
fig.add_trace(go.Bar(x=survive_sex_df['Sex'],
                y=survive_sex_df['count'],
                text=survive_sex_df['count'],
                name='Survived',
                marker_color=color_2
                ), row=1, col=1)

fig.add_trace(go.Bar(x=not_survive_sex_df['Sex'],
                y=not_survive_sex_df['count'],
                text=not_survive_sex_df['count'],
                name='Not Survived',
                marker_color=color_1
                ), row=1, col=1)

## Survived and Sex Sunburst
sb1 = px.sunburst(survived_sex_df, values='count', path=['Survived', 'Sex'], color='Survived',
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb1.data[0], row=1, col=2)


## Pclass Bars
fig.add_trace(go.Bar(y=survive_pclass_df['Pclass'],
                x=survive_pclass_df['count'], orientation='h',
                text=survive_pclass_df['count'],
                name='Survived',
                marker_color=color_2
                ), row=2, col=1)

fig.add_trace(go.Bar(y=not_survive_pclass_df['Pclass'],
                x=not_survive_pclass_df['count'], orientation='h',
                text=not_survive_pclass_df['count'],
                name='Not Survived',
                marker_color=color_1
                ), row=2, col=1)


## Survived and Pclass Sunburst
sb2 = px.sunburst(survived_pclass_df, values='count', path=['Survived', 'Pclass'],
                 color_discrete_sequence=[color_2, color_1])
fig.add_trace(sb2.data[0], row=2, col=2)


## Embarked Bars
fig.add_trace(go.Bar(y=survive_embarked_df['Embarked'],
                x=survive_embarked_df['count'], orientation='h',
                text=survive_embarked_df['count'],
                name='Survived',
                marker_color=color_2
                ), row=3, col=1)
fig.add_trace(go.Bar(y=not_survive_embarked_df['Embarked'],
                x=not_survive_embarked_df['count'], orientation='h',
                text=not_survive_embarked_df['count'],
                name='Not Survived',
                marker_color=color_1
                ),row=3, col=1)

## Survived and Embarked Sunburst
sb3 = px.sunburst(survived_embarked_df, values='count', path=['Survived', 'Embarked'],
                 color_discrete_sequence=[color_1, color_2])
fig.add_trace(sb3.data[0], row=3, col=2)


## Age Histogram
fig.add_trace(go.Histogram(x=survived_df['Age'], name="Survived", histnorm='percent',
                          marker_color=color_2), row=4, col=1)
fig.add_trace(go.Histogram(x=not_survived_df['Age'], name="Not Survived", histnorm='percent', 
                          marker_color=color_1), row=5, col=1)


## Family Size Distribution
fig.add_trace(go.Histogram(x=survived_df['FamilySize'], name="Survived", histnorm='percent',
                          marker_color=color_2), row=6, col=1)

fig.add_trace(go.Histogram(x=not_survived_df['FamilySize'], name="Not Survived", histnorm='percent',
                          marker_color=color_1), row=6, col=1)


fig.update_layout(height=1800, width=800,
                  showlegend=False,
                  title_text="Survived Analysis wrt", title_x=0.5,
                  titlefont={'size': 25, 'family':'Rubik'},
                  paper_bgcolor=bg_color,
                  plot_bgcolor=bg_color
)

fig.update_traces(marker_line_color='#3f484b',
                  marker_line_width=1.5)

fig.update_traces(selector = ({'type': 'sunburst'}), textinfo = 'label+percent parent')

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Gender vs Survival, Stats:

Men Survival Rate: 18.6%

Women Survival Rate: 74%

Pclass vs Survival, Stats:

Pclass 1 Survival Rate: 62.4%

Pclass 2 Survival Rate: 47.3%

Pclass 3 Survival Rate: 24.2%

Embarked vs Survival, Stats:

Embarked S Survival Rate: 33.9%

Embarked Q Survival Rate: 39%

Embarked C Survival Rate: 54.5%

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Gender vs Pclass vs Age

In [41]:
sex_pclass_age_df = df.groupby(['Sex', 'Pclass', 'Age']).size().reset_index().rename(columns={0: 'count'})

fig = px.treemap(sex_pclass_age_df, path=[px.Constant("Treemap"), 'Sex', 'Pclass'], values='count',
                  color='Age', hover_data=['count'],
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(sex_pclass_age_df['Age'], weights=sex_pclass_age_df['count']))

fig.update_traces(textinfo="label+percent parent")

fig.update_layout(title_text="<i> Gender </i> X <i> Pclass </i>", title_x=0.5, height=500, template='plotly_dark',
        font=dict(
            family="Rubik",
            size=16)
)

fig.show()

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Men vs Pclass Statistical Summary:

Men with Pclass:1, Average Age: 39

Men with Pclass:2, Average Age: 30.5

Men with Pclass:3, Average Age: 27

Women vs Pclass Statistical Summary:

Women with Pclass:1, Average Age: 34

Women with Pclass:2, Average Age: 29

Women with Pclass:3, Average Age: 23.6

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||

Gender vs Embarked vs Age

|Aspect|Elaboration|
|-|-|
|justification of chart||
|results||