# Left-handedness age-gap analysis & survival analysis examples
This notebook reproduces the Bayes-rule analysis showing how cohort-dependent rates of left-handedness can create an apparent gap in mean age at death, and includes a short survival-analysis example.

## Part A â€” Bayes-rule reproduction of the left-handed age-gap

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
LH_URL = ("https://gist.githubusercontent.com/mbonsma/8da0990b71ba9a09f7de395574e54df1/"
          "raw/aec88b30af87fad8d45da7e774223f91dad09e88/lh_data.csv")
DEATH_URL = ("https://gist.githubusercontent.com/mbonsma/2f4076aab6820ca1807f4e29f75f18ec/"
             "raw/62f3ec07514c7e31f5979beeca86f19991540796/cdc_vs00199_table310.tsv")

lh = pd.read_csv(LH_URL)
lh['Birth_year'] = 1986 - lh['Age']
lh['Mean_lh'] = lh[['Female','Male']].mean(axis=1)

deaths = pd.read_csv(DEATH_URL, sep='\t', skiprows=[1])
deaths = deaths.dropna(subset=['Both Sexes']).reset_index(drop=True)
deaths['Age'] = deaths['Age'].astype(int)

lh.head(), deaths.head()

### Plot left-handed rates by birth year

In [None]:
plt.figure(figsize=(8,4))
plt.plot(lh['Birth_year'], lh['Mean_lh'], marker='o')
plt.xlabel('Birth year')
plt.ylabel('Mean % left-handed')
plt.title('Digitized left-handedness rates (Gilbert & Wysocki, 1986 data)')
plt.grid()
plt.show()

### Plot death distribution (1999)

In [None]:
plt.figure(figsize=(8,4))
plt.plot(deaths['Age'], deaths['Both Sexes'], marker='o')
plt.xlabel('Age')
plt.ylabel('Number of deaths (Both Sexes)')
plt.title('US deaths by age (1999)')
plt.grid()
plt.show()

### Functions for Bayes-rule model

In [None]:
def P_lh_given_A(ages_of_death, lefthanded_data, study_year=1990):
    ages = np.asarray(ages_of_death, dtype=int)
    early = lefthanded_data['Mean_lh'].iloc[-10:].mean()
    late = lefthanded_data['Mean_lh'].iloc[:10].mean()
    byears = study_year - ages
    d = lefthanded_data.set_index('Birth_year')['Mean_lh'].to_dict()
    res = []
    for by in byears:
        if by in d: res.append(d[by]/100)
        elif by < min(d): res.append(early/100)
        else: res.append(late/100)
    return np.array(res)

def P_lh(deaths, lh, study_year=1990):
    p = P_lh_given_A(deaths['Age'], lh, study_year)
    w = deaths['Both Sexes'] / deaths['Both Sexes'].sum()
    return np.sum(p*w)

def P_A_given_lh(ages, deaths, lh, study_year=1990):
    ages = np.asarray(ages)
    pop = deaths.set_index('Age')['Both Sexes']
    P_A = np.array([pop.get(a,0) for a in ages], float)
    P_A /= P_A.sum()
    P_L = P_lh(deaths, lh, study_year)
    P_LA = P_lh_given_A(ages, lh, study_year)
    out = (P_LA * P_A) / P_L
    out /= out.sum()
    return out

def P_A_given_rh(ages, deaths, lh, study_year=1990):
    ages = np.asarray(ages)
    pop = deaths.set_index('Age')['Both Sexes']
    P_A = np.array([pop.get(a,0) for a in ages], float)
    P_A /= P_A.sum()
    P_R = 1 - P_lh(deaths, lh, study_year)
    P_RA = 1 - P_lh_given_A(ages, lh, study_year)
    out = (P_RA * P_A) / P_R
    out /= out.sum()
    return out

In [None]:
ages = np.arange(6,115)
p_lh = P_A_given_lh(ages, deaths, lh)
p_rh = P_A_given_rh(ages, deaths, lh)
mean_lh = np.sum(ages * p_lh)
mean_rh = np.sum(ages * p_rh)
print('Mean age LH:', mean_lh)
print('Mean age RH:', mean_rh)
plt.plot(ages, p_lh, label='P(A|LH)')
plt.plot(ages, p_rh, label='P(A|RH)')
plt.legend()
plt.show()