In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from itertools import combinations
import plotnine as p

In [3]:
# read data
import ssl
ssl._crete_default_https_context = ssl._create_unverified_context
def read_data(file):
    return pd.read_stata("https://raw.github.com/scunning1975/mixtape/master/" + file)

In [27]:
# read Titanic data
titanic = read_data("titanic.dta")

In [28]:
# new col variable to separate those in 1st class 
# from normal people
titanic['d'] = 0

# set variable d to 1 if person in 1st class
titanic.loc[titanic['class']=='1st class', 'd'] = 1

In [29]:
# new col variable to separate men from women
titanic['sex_d'] = 0

# set variable sex_d to 1 if person was a man
titanic.loc[titanic['sex']=='man', 'sex_d'] = 1

In [30]:
# new col variable to separate adults from children
titanic['age_d'] = 0

# set variable age_d to 1 if person was an adult
titanic.loc[titanic['age']=='adults', 'age_d'] = 1

In [37]:
# new col variable to separate those who lived from
# those who didn't
titanic['survived_d'] = 0

# set variable survived_d to 1 if they lived
titanic.loc[titanic['survived']=='yes', 'survived_d'] = 1

In [45]:
# determine percentage of survivors not in first class
ey0 = titanic.loc[titanic['d']==0, 'survived_d'].mean()

# determined percentage of survivors in first class
ey1 = titanic.loc[titanic['d']==1, 'survived_d'].mean()

In [47]:
# get the simple difference in outcomes
sdo = ey1 - ey0
print("The simple difference in outcomes is {:.2%}".format(sdo))

The simple difference in outcomes is 35.38%


In [49]:
# new col variable to separate young from old and by gender
titanic['s'] = 0

titanic.loc[(titanic.sex_d == 0) & (titanic.age_d == 1), 's'] = 1
titanic.loc[(titanic.sex_d == 0) & (titanic.age_d == 0), 's'] = 2
titanic.loc[(titanic.sex_d == 1) & (titanic.age_d == 1), 's'] = 3
titanic.loc[(titanic.sex_d == 1) & (titanic.age_d == 0), 's'] = 4

In [80]:
# get number of people not in 1st class
obs = titanic.loc[titanic.d == 0].shape[0]

In [81]:
# get simple difference in outcomes between
# survivors in 1st class and survivors not in 1st class

def weighted_avg_effect(df):
    diff = df[df.d == 1].survived_d.mean()
    diff -= df[df.d == 0].survived_d.mean()
    return diff

In [89]:
# call weighted_avg_effect on each s 
# (young/old males/females)
# then get the sum of all returned values
# (i.e., get weighted average treatment effect estimate)

wate = titanic.groupby('s').apply(weighted_avg_effect).sum()
print("The weighted average treatment effect estimate is {:.2%}".format(wate))

The weighted average treatment effect estimate is 146.28%
