In [5]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None 
import plotly.express as px

In [45]:
# Infection day distribution based on symptoms of index

viral_loads = pd.read_csv('data/viral_load_mc.csv')

viral_load_s = viral_loads.loc[lambda x: x['14'] != 0]
viral_load_s['symptom_day'] = viral_load_s['14'].map(lambda y: np.round(y))
total_s = viral_load_s.shape[0]

count = []
for t in range(14):
   viral_load_s_i = viral_load_s.loc[lambda x: x[str(t)] > 6]
   viral_load_s_i['diff'] = viral_load_s_i['symptom_day'] - t
   count.append(viral_load_s_i['diff'].value_counts())

   pdf = {i: 0 for i in range(-10, 10)}

for s in count:
    for i in s.index:
        pdf[i] = pdf[i] + s[i]

pdf2 = {i: pdf[i] / total_s for i in pdf.keys() if (i >= 0 and pdf[i] > 0)}
pdf3 = {d: pdf2[d] / sum(pdf2.values()) for d in pdf2}
pdf4 = {d: np.prod([1 - pdf3[i] for i in pdf3 if i > d]) for d in pdf3}
pdf5 = {d: pdf3[d] * pdf4[d] for d in pdf4}
{d: pdf5[d] / sum(pdf5.values()) for d in pdf5}

{0: 0.1669353401405535,
 1: 0.22514759942847082,
 2: 0.26480058806134404,
 3: 0.21587741781643335,
 4: 0.10776722889318635,
 5: 0.01918233381609943,
 6: 0.0002894918439125493}

In [4]:
# Infection day distribution based on antigen positive of index

viral_loads = pd.read_csv('data/viral_load_mc.csv').iloc[:int(1e6)].rename(columns={'Unnamed: 0': 'id'})

prob_P = {}
for d in range(1, 13):
    for t in range(d, 13):
        viral_load_p = viral_loads.loc[lambda x: x[str(d - 1)] < 5].loc[lambda x: x[str(d)] >= 5].loc[lambda x: x[str(t)] >= 5].loc[lambda x: x[str(t + 1)] < 5]
        prob_P[(d, t)] = viral_load_p.shape[0] / 1e6
for d in range(1, 12):
    viral_load_p = viral_loads.loc[lambda x: x[str(d - 1)] < 5].loc[lambda x: x[str(d)] >= 5].loc[lambda x: x['13'] >= 5]
    prob_P[(d, 13)] = viral_load_p.shape[0] / 1e6

prob_t = {t: sum([prob_P[d] / (d[1] - d[0] + 1) for d in prob_P if d[0] <= t and t <= d[1]]) for t in range(2, 14)}

count_days = {}
for d in range(2, 14):
    viral_load_d = viral_loads.loc[lambda x: x[str(d)] >= 5]
    viral_load_d = viral_load_d[['id'] + [str(t) for t in range(0, d + 1)]]
    viral_load_d = pd.melt(viral_load_d, id_vars=['id'], var_name='day', value_name='viral_load')
    viral_load_d = viral_load_d.loc[lambda x: x['viral_load'] >= 6]
    count_days[d] = viral_load_d['day'].value_counts()

pdf = {d: count_days[d] / count_days[d].sum() for d in range(2, 14)}
pdf2 = {d: {i: pdf[d][str(i)] for i in range(2, d + 1)} for d in pdf}
pdf3 = {d: {t: pdf2[d][t] * np.prod([1 - pdf2[d][i] for i in pdf2[d] if i < t]) for t in pdf2[d]} for d in pdf2}
pdf4 = {d: {d - t: pdf3[d][t] / sum(pdf3[d].values()) for t in pdf3[d]} for d in pdf3}
pdf5 = {i: sum([prob_t[t]* pdf4[t][i]  for t in prob_t if t - 1 > i]) for i in range(14)}
pdf6 = {i: pdf5[i] for i in pdf5 if i <= 9}
{i: pdf6[i] / sum(pdf6.values()) for i in pdf6}

{0: 0.2293036219761009,
 1: 0.18128384580190515,
 2: 0.14653955482675665,
 3: 0.12010576845981617,
 4: 0.09792175735034825,
 5: 0.0781634715677543,
 6: 0.05988473052433669,
 7: 0.043097718698014466,
 8: 0.02825970454857069,
 9: 0.015439826246396676}

In [6]:
# Pareto Points

res = pd.read_csv('data/results_antigen_25_05_1.csv')

def pareto(df):
    aux = df.sort_values('expected_infecting_days', ascending=True)

    pareto_points = []    
    while aux.shape[0] > 0:
        pareto_points.append(aux.iloc[0:1])
        current_high = aux['expected_non_infecting_days'].iloc[0]
        aux = aux.loc[lambda x: x['expected_non_infecting_days'] > current_high]
        
    return pd.concat(pareto_points)

pareto_points = res.groupby(['num_antigen', 'num_pcr'], as_index=False).apply(pareto)

pareto_points.to_csv('data/pareto_points_antigen_25_05_1.csv')


In [97]:
# Table of best policies

res = pd.read_csv('data/results_19_05_1.csv').rename(columns={'variance_infencting_days': 'variance_infecting_days'})

def best_k(df, k=3):
    best_k_df = df.sort_values('expected_infecting_days', ascending=True).iloc[:k]
    return best_k_df

def best_p(df, p=.05):
    best_value = df.sort_values('expected_infecting_days', ascending=True)['expected_infecting_days'].iloc[0]
    best_p_df = df.loc[df['expected_infecting_days'] < (1 + p) * best_value]
    return best_p_df

def best_p_and_k(df, p=0.2, k=1):
    best_value = df.sort_values('expected_infecting_days', ascending=True)['expected_infecting_days'].iloc[0]
    best_p_df = df.loc[df['expected_infecting_days'] < (1 + p) * best_value]
    best_p_k_df = (
        best_p_df
        .sort_values('expected_infecting_days', ascending=True)
        .iloc[:k]
        .sort_values('variance_infecting_days', ascending=True)
        .assign(ranking = lambda x: list(range(1, x.shape[0] + 1)))
    )

    return best_p_k_df

def best_sd(df):
    best_value = df.sort_values('expected_infecting_days', ascending=True)['expected_infecting_days'].iloc[0]
    best_value_sd = df.sort_values('expected_infecting_days', ascending=True)['variance_infecting_days'].iloc[0]
    best_sd_df = df.loc[df['expected_infecting_days'] < best_value_sd + best_value]
    return best_sd_df


# (res
#     .groupby([ 'num_pcr', 'num_antigen'], as_index=False)
#     .apply(best_p_and_k)
#     # .set_index([ 'num_pcr', 'num_antigen', 'ranking'])
#     .assign(policy = lambda x: 'Antigen: ' + x['antigen_days'] + ', PCR: ' + x['pcr_days'])
#     .pivot(index=['num_pcr', 'num_antigen'],
#            columns='ranking',
#            values=['policy'])
#     .fillna('[]')
#     .applymap(lambda x: x.replace('[]', ''))
#     .to_csv('data/best_policies_table_20_05_2.csv')
# )

(res
    .groupby([ 'num_pcr', 'num_antigen'], as_index=False)
    .apply(best_p_and_k)
    # .set_index([ 'num_pcr', 'num_antigen', 'ranking'])
    .assign(policy = lambda x: 'Antigen: ' + x['antigen_days'] + ', PCR: ' + x['pcr_days'])
    .pivot(index=['num_pcr', 'num_antigen'],
           columns='ranking',
           values=['expected_infecting_days', 'expected_non_infecting_days'])
    .fillna(0)
    # .applymap(lambda x: x.replace('[]', ''))
    .to_csv('data/best_performances_table_20_05_1.csv')
)

# res_aux = res.groupby([ 'num_pcr', 'num_antigen',], as_index=False).apply(best_p_and_k)#.assign(antigen_days = lambda x: 'Antigen: ' + x['antigen_days'], pcr_days = lambda x: 'PCR: ' + x['pcr_days'])

# res1 = res_aux.loc[lambda x: x['ranking'] == 1][['num_pcr', 'num_antigen', 'antigen_days', 'pcr_days', 'ranking']]
# res2 = res_aux.loc[lambda x: x['ranking'] == 2][['num_pcr', 'num_antigen', 'antigen_days', 'pcr_days', 'ranking']]
# res3 = res_aux.loc[lambda x: x['ranking'] == 3][['num_pcr', 'num_antigen', 'antigen_days', 'pcr_days', 'ranking']]

# res1.merge(res2, on=['num_pcr', 'num_antigen']).merge(res3, on=['num_pcr', 'num_antigen']).sort_values(['num_pcr', 'num_antigen']).set_index(['num_pcr', 'num_antigen'])

# aux = res.groupby([ 'num_pcr', 'num_antigen',], as_index=False).apply(best_p_and_k).assign(antigen_days = lambda x: 'Antigen: ' + x['antigen_days'], pcr_days = lambda x: 'PCR: ' + x['pcr_days']).groupby([ 'num_pcr', 'num_antigen', 'ranking']).agg({'antigen_days': 'first', 'pcr_days':'first'}).unstack()
# aux.columns = aux.columns.swaplevel(0, 1)
# aux.sort_index(level=0, axis=1).fillna('[]').applymap(lambda x: x.replace('[]', '')).to_csv('data/best_policies_table_20_05_1.csv')

In [30]:
pareto_points.assign(combo = lambda x: str(y['num_antigen']) + ',' + str(y['num_pcr']))

NameError: name 'y' is not defined

In [37]:
pdf2

{0: 1.0,
 1: 0.9989026309458073,
 2: 0.9002216137504923,
 3: 0.6164350165325337,
 4: 0.2849591641515709,
 5: 0.050062788229371935,
 6: 0.0007553786983207074}