In [None]:
import data_io
import pandas as pd
import matplotlib.pyplot as plt
import parameters as param
import numpy as np

Load files

In [None]:
upper = 1
agg_markov_l=[]
for time_since_symptoms in range(param.SYMP_MIN, param.SYMP_MAX + upper, 10):
    for race in range(param.RACE_MIN, param.RACE_MAX + upper):
        agg_markov_name = data_io.BASIC_ANALYSIS_OUTPUT / param.build_filename_prefix(
            time_since_symptoms=time_since_symptoms,
            race=race,
            suffix='_aggregated_markov_changes',
            format='.xlsx')
        print(agg_markov_name)
        agg_markov = pd.read_excel(agg_markov_name)
        agg_markov_l.append(agg_markov)

agg_markov_total = pd.concat(agg_markov_l)

## BOXPLOT

In [None]:
fig, ax = plt.subplots(figsize=(15,10))
(agg_markov_total.pivot(columns='RACE',values='QALYdiff_af')*365).boxplot(ax=ax,sym='+')
plt.title('Generic chooses hospital A, Revised chooses hospital B')
plt.xlabel('RACE score')
plt.ylabel('Quality adjusted days difference \nwhen going to hospital B instead of hospital A according to revised model')
plt.tight_layout()
fig.savefig(data_io.GRAPH_OUTPUT/'trend_by_race_score.png',dpi=500)

In [None]:
def extract_type(val):
    return val.split(' ')[1].replace('(', '').replace(')', '')


def change_type(df):
    return extract_type(df.BestCenter_be) + ' to ' + extract_type(
        df.BestCenter_af)

agg_markov_total['ChangeType'] = agg_markov.apply(change_type, axis=1)

In [None]:
gb = agg_markov_total.groupby(['ChangeType'])
fig, axs = plt.subplots(1,4,figsize=(17,10),sharex=True,sharey=True)
axs = axs.flatten()
for i,(gn,g) in enumerate(gb):
    (g.pivot(columns='RACE',values='QALYdiff_af')*365).boxplot(ax=axs[i],sym='+')
    axs[i].set_title(gn)
    if i ==0 : axs[i].set_ylabel('Quality adjusted days difference \nwhen going to hospital B instead of hospital A according to revised model')
fig.suptitle('Generic chooses hospital A, Revised chooses hospital B')
fig.savefig(data_io.GRAPH_OUTPUT/'trend_by_race_score_type_breakdown.png',dpi=500)

## HISTOGRAM

In [None]:
fig,axs=plt.subplots(5,2,sharex=True,sharey=True,figsize=(15,10))
axs=axs.flatten()
axs[4].set_ylabel('Frequency')
axs[8].set_xlabel('QALYdiff')
axs[9].set_xlabel('QALYdiff')
fig.suptitle('Histogram of QALYdiff_af in days\nout of 10k patient locations')
for i,(gn, g) in enumerate(agg_markov_total.groupby('RACE')):
    (g['QALYdiff_af']*365).hist(ax=axs[i],bins=np.arange(0,0.2*365,0.0025*365),edgecolor='black')
    axs[i].set_xlabel(gn)
# plt.tight_layout()
fig.savefig(data_io.GRAPH_OUTPUT/'histogram_by_race_score.png',dpi=500)