In [36]:
import numpy as np
import pandas as pd
import seaborn as sns


class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [37]:
titanic.groupby(['sex','survived']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,pclass,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
sex,survived,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
female,0,81,64,81,81,81,81,81,81,81,6,81,81,81
female,1,233,197,233,233,233,231,233,233,233,91,231,233,233
male,0,468,360,468,468,468,468,468,468,468,61,468,468,468
male,1,109,93,109,109,109,109,109,109,109,45,109,109,109


In [38]:
titanic.groupby(['sex','survived'])['fare'].count()

sex     survived
female  0            81
        1           233
male    0           468
        1           109
Name: fare, dtype: int64

In [39]:
titanic.groupby(['sex'])['survived'].mean()

sex
female    0.742038
male      0.188908
Name: survived, dtype: float64

This immediately gives us some insight: overall, three of every four females on board survived, while only one in five males survived!

This is useful, but we might like to go one step deeper and look at survival by both sex and, say, class.

In [41]:
display("titanic.groupby(['sex','class'])['survived'].aggregate('mean').unstack()",
       "titanic.groupby(['sex','class'])['survived'].aggregate('mean').unstack().T")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447

sex,female,male
class,Unnamed: 1_level_1,Unnamed: 2_level_1
First,0.968085,0.368852
Second,0.921053,0.157407
Third,0.5,0.135447


In [50]:
# the standard aggfunction is MEAN, though of course it  can be specified
# the agg function gives the 1 number which will be reported, for each pair of pivot columns
# https://dfrieds.com/data-analysis/pivot-table-python-pandas.html

display("titanic.pivot_table('survived', index='sex', columns='class')",
        "titanic.pivot_table('survived', index='class', columns='sex',aggfunc='mean')")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447

sex,female,male
class,Unnamed: 1_level_1,Unnamed: 2_level_1
First,0.968085,0.368852
Second,0.921053,0.157407
Third,0.5,0.135447
