In [17]:
# Import libraries
import numpy as np
import pandas as pd
from scipy.stats import ttest_1samp, ttest_ind, chi2_contingency

In [4]:
# Load datasets
lifespans = pd.read_csv('familiar_lifespan.csv')
iron = pd.read_csv('familiar_iron.csv')
lifespans.head()

Unnamed: 0,pack,lifespan
0,vein,76.25509
1,artery,76.404504
2,artery,75.952442
3,artery,76.923082
4,artery,73.771212


##### Analyse Vein pack #####

In [6]:
# Extract lifespan of subscribers of the 'vein' pack
vein_pack_lifespans = lifespans.lifespan[lifespans.pack == 'vein']
avg_vein_lifespan = np.mean(vein_pack_lifespans)
avg_vein_lifespan

76.16901335636044

In [8]:
# Run a significance test to see if there is a difference for the average lifespan
# of a vein pack subscriber, 73 years or not
tstat, pval = ttest_1samp(vein_pack_lifespans, 73)
pval

5.972157921433211e-07

##### Analyse Artery pack #####

In [11]:
# Extract lifespan of subscribers of the 'artery' pack
artery_pack_lifespans = lifespans.lifespan[lifespans.pack == 'artery']
avg_artery_lifespan = np.mean(artery_pack_lifespans)
avg_artery_lifespan

74.8736622351704

In [13]:
# Run a significance test to see if there is a difference for the average lifespan
# between vein and artery subscribers
tstat, pval = ttest_ind(vein_pack_lifespans, artery_pack_lifespans)
pval

0.05588883079070819

##### Side Effects #####

In [14]:
iron.head()

Unnamed: 0,pack,iron
0,vein,low
1,artery,normal
2,artery,normal
3,artery,normal
4,artery,high


In [15]:
# create contingency table for the two columns in iron
xtab = pd.crosstab(iron.iron, iron.pack)
xtab

pack,artery,vein
iron,Unnamed: 1_level_1,Unnamed: 2_level_1
high,87,20
low,29,140
normal,29,40


In [18]:
# Run a significance test to see if there is an association between a pack 
# someone subscribes to and their iron level
chi2, pval, dpf, expected = chi2_contingency(xtab)
pval

9.359749337432874e-25