In [None]:
import seaborn as sns
import pandas as pd

titanic_ds = sns.load_dataset('titanic')
colors = ['red' if x==0 else 'blue' for x in [0, 1]]

titanic_ds.drop(labels=['embark_town', 'embarked', 'deck', 'class', 'alive'], axis=1, inplace=True)

titanic_ds.replace('male', 0, inplace=True)
titanic_ds.replace('female', 1, inplace=True)
titanic_ds.replace('man', 0, inplace=True)
titanic_ds.replace('woman', 1, inplace=True)
titanic_ds.replace('child', 2, inplace=True)

titanic_ds.dropna(axis=0, inplace=True)

titanic_ds.head()

In [None]:
bp = sns.barplot(x='sex', y='survived', data=titanic_ds, palette=colors)
for g in bp.patches:
    bp.annotate(format(g.get_height(), '.3f'), (g.get_x() + g.get_width() / 2., g.get_height() + .05))

## This is a great example, now with annotations that show the exact probability of surviving in the Titanic disaster for men and women, showing the great difference in the survival rates (more than 0.5). This, as said in previous labs can be due to the fact of men helping women and children get to safety

In [None]:
titanic_male = titanic_ds[titanic_ds['sex'] == 0]
age_bins = [0, 10, 20, 30, 40, 50, 60, 100]
titanic_male['AgeGroup'] = pd.cut(titanic_male['age'], bins=age_bins, labels=['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60+'])

bp = sns.barplot(data=titanic_male, x='AgeGroup', y='survived', palette=colors)
for g in bp.patches:
    bp.annotate(format(g.get_height(), '.3f'), (g.get_x() + g.get_width() / 2., g.get_height() + .05))

## In this example we can see the great difference in the survival rate of men depending on the age, again, we see how children and kids are the group with the highest survival rate, the only groups after this are the 30 - 40 and the 40 - 50 groups, that, even though they are second and third respectively are still far away from the survival rate of kids. Overall we can see that men did not have a high survival rate, having kids as the group with most survival rates, rates that are very close to 50% 

In [None]:
age_bins = [0, 20, 100]
titanic_male['AgeGroup'] = pd.cut(titanic_male['age'], bins=age_bins, labels=['0=20', '20+'])

bp = sns.barplot(data=titanic_male, x='AgeGroup', y='survived', palette=colors)
for g in bp.patches:
    bp.annotate(format(g.get_height(), '.3f'), (g.get_x() + g.get_width() / 2., g.get_height() + .05))

## A male who is in the second group has lower survival rates (even though the overall survival rates are still pretty low) than the survival rate of those 0 - 20, this is because in this group we find children and kids, who where the age group with the highest survival rate overall