In [None]:
import sys
sys.path.append("..")

import os
import json
import numpy as np
import datetime as dt
import pandas as pd
from src.VaccineEffectiveness import VaccineEffectiveness

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
path_file = os.path.join("..", "output", "data", "COHORT_21JAN2021_31AUG2021")

df_pair = pd.read_csv(os.path.join(path_file, "pareados_corona_1.csv"), dtype={"CPF": str, "PAR": str})
df_info = pd.read_csv(os.path.join(path_file, "pares_eventos_corona_1.csv"))

In [None]:
ve_obj = VaccineEffectiveness(df_pair, df_info)

In [None]:
intervals = ve_obj.define_intervals(dt.date(2021, 8, 31), return_=True)

In [6]:
ve_obj.intervals[1]

{'CPF CASO': '10965394387',
 'CPF CONTROLE': '11633581349',
 'D1': ([('D1 to D2', 24),
   ('D1 to D1_CONTROL', 66),
   ('D1 to COVID', nan),
   ('D1 to GERAL', nan),
   ('D1 to FIM', 222)],
  [('D1 to D1_CONTROL', 66),
   ('D1 to COVID_CONTROL', nan),
   ('D1 to GERAL_CONTROL', nan),
   ('D1 to FIM', 222)]),
 'D2': ([('D2 to D1_CONTROL', 42),
   ('D2 to COVID', nan),
   ('D2 to GERAL', nan),
   ('D2 to FIM', 198)],
  [('D2 to D1_CONTROL', 42),
   ('D2 to COVID_CONTROL', nan),
   ('D2 to GERAL_CONTROL', nan),
   ('D2 to FIM', 198)])}

In [23]:
caso_dict = ve_obj.casos_hash['17381339315']
controle_dict= ve_obj.controles_hash['02604396300']

In [24]:
print(caso_dict)
print("\n")
print(controle_dict)

{'CPF': '17381339315', 'DATA D1': Timestamp('2021-02-28 00:00:00'), 'DATA D2': Timestamp('2021-05-19 00:00:00'), 'DATA OBITO COVID': NaT, 'DATA OBITO GERAL': NaT, 'TIPO': 'CASO', 'PAR': '02604396300'}


{'CPF': '02604396300', 'DATA D1': NaT, 'DATA D2': NaT, 'DATA OBITO COVID': Timestamp('2021-04-16 00:00:00'), 'DATA OBITO GERAL': NaT, 'TIPO': 'CONTROLE', 'PAR': '17381339315'}


In [18]:
intervals[(pd.notna(intervals["CASO OBITO COVID"])) | (pd.notna(intervals["CONTROLE OBITO COVID"]))][23:24]

Unnamed: 0,CPF CASO,CPF CONTROLE,CASO INTERVALO D1,CASO CENSORADO D1,CASO OBITO COVID,CONTROLE INTERVALO D1,CONTROLE CENSORADO D1,CONTROLE OBITO COVID,CASO INTERVALO D2,CASO CENSORADO D2,CONTROLE INTERVALO D2,CONTROLE CENSORADO D2
2529,17381339315,2604396300,80.0,True,NaT,184.0,True,2021-04-16,104.0,True,104.0,True


In [25]:
def compare_pair_survival(caso_hash, controle_hash, events_col, final_cohort):
    '''
        Description.
        
        Args:
            caso_hash:
                dictionary.
            controle_hash:
                dictionary.
            events_col:
                dictionary.
            final_cohort:
                datetime.date.
        Return:
            xxx:
                xxx.
    '''
    cpf_caso = caso_hash["CPF"]
    cpf_controle = controle_hash["CPF"]
    # Get events of case
    caso_d1_date = caso_hash[events_col["D1"]]
    caso_d2_date = caso_hash[events_col["D2"]]
    caso_covid_date = caso_hash[events_col["OBITO COVID"]]
    caso_geral_date = caso_hash[events_col["OBITO GERAL"]]
    # Get events of control
    control_d1_date = controle_hash[events_col["D1"]]
    control_d2_date = controle_hash[events_col["D2"]]
    control_covid_date = controle_hash[events_col["OBITO COVID"]]
    control_geral_date = controle_hash[events_col["OBITO GERAL"]]
    
    f = lambda x: x.date() if not pd.isna(x) else np.nan
    g = lambda x,y: (x-y).days if not pd.isna(x) and not pd.isna(y) else np.nan
            
    # --> D1
    start_date = caso_d1_date.date()
    caso_diff = {
        "D1 to D2": g(f(caso_d2_date),start_date),
        "D1 to D1_CONTROL": g(f(control_d1_date),start_date),
        "D1 to COVID": g(f(caso_covid_date), start_date),
        "D1 to GERAL": g(f(caso_geral_date), start_date),
        "D1 to FIM": g(final_cohort, start_date)
    }
    control_diff = {
        "D1 to D1_CONTROL": g(f(control_d1_date),start_date),
        "D1 to COVID_CONTROL": g(f(control_covid_date),start_date),
        "D1 to GERAL_CONTROL": g(f(control_geral_date), start_date),
        "D1 to FIM": g(final_cohort,start_date)
    }
    
    # --> D2
    start_date = caso_d2_date.date()
    caso_diff_d2 = {
        "D2 to D2": g(f(caso_d2_date),start_date),
        "D2 to D1_CONTROL": g(f(control_d1_date),start_date),
        "D2 to COVID": g(f(caso_covid_date), start_date),
        "D2 to GERAL": g(f(caso_geral_date), start_date),
        "D2 to FIM": g(final_cohort, start_date)
    }
    control_diff_d2 = {
        "D2 to D1_CONTROL": g(f(control_d1_date),start_date),
        "D2 to COVID_CONTROL": g(f(control_covid_date),start_date),
        "D2 to GERAL_CONTROL": g(f(control_geral_date), start_date),
        "D2 to FIM": g(final_cohort,start_date)
    }
    
    caso_events_d1 = [ (key, caso_diff[key]) for key in caso_diff.keys() ]
    control_events_d1 = [ (key, control_diff[key]) for key in control_diff.keys() ]
    caso_events_d2 = [ (key, caso_diff_d2[key]) for key in caso_diff_d2.keys() ]
    control_events_d2 = [ (key, control_diff_d2[key]) for key in control_diff_d2.keys() ]
    res = {
        "CPF CASO": cpf_caso,
        "CPF CONTROLE": cpf_controle,
        "D1": (caso_events_d1, control_events_d1),
        "D2": (caso_events_d2, control_events_d2)
    }
    return res

In [28]:
cols = {
    "D1": "DATA D1", "D2": "DATA D2",
    "OBITO COVID": "DATA OBITO COVID",
    "OBITO GERAL": "DATA OBITO GERAL"
}
info = compare_pair_survival(caso_dict, controle_dict, cols, dt.date(2021, 8, 31))
info

{'CPF CASO': '17381339315',
 'CPF CONTROLE': '02604396300',
 'D1': ([('D1 to D2', 80),
   ('D1 to D1_CONTROL', nan),
   ('D1 to COVID', nan),
   ('D1 to GERAL', nan),
   ('D1 to FIM', 184)],
  [('D1 to D1_CONTROL', nan),
   ('D1 to COVID_CONTROL', 47),
   ('D1 to GERAL_CONTROL', nan),
   ('D1 to FIM', 184)]),
 'D2': ([('D2 to D2', 0),
   ('D2 to D1_CONTROL', nan),
   ('D2 to COVID', nan),
   ('D2 to GERAL', nan),
   ('D2 to FIM', 104)],
  [('D2 to D1_CONTROL', nan),
   ('D2 to COVID_CONTROL', -33),
   ('D2 to GERAL_CONTROL', nan),
   ('D2 to FIM', 104)])}

In [32]:

info_d1_caso = info["D1"][0]
info_d1_controle = info["D1"][1]
info_d1_caso = sorted(info_d1_caso, key=lambda tup: tup[1])
info_d1_controle = sorted(info_d1_controle, key=lambda tup: tup[1])
info_d1_caso = [ x for x in info_d1_caso if not pd.isna(x[1]) ]
info_d1_controle = [ x for x in info_d1_controle if not pd.isna(x[1]) ]

In [33]:
info_d1_controle

[('D1 to COVID_CONTROL', 47), ('D1 to FIM', 184)]