# LIB

In [1]:
import os
import sys
sys.path.append("..")

import numpy as np
import pandas as pd
import datetime as dt

import lib.survival_f as surv

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

# Auxiliary functions

In [2]:
def stratify_pares(data, data_pop):
    '''
        Stratification countings from the matched pairs dataset and the eligible population of
        the specific vaccine.
        
        Args:
            data:
                pandas.DataFrame. File of pairs with only the basic dates and type of individuals 
                (case or control).
            data_pop:
                pandas.DataFrame. File of eligible individuals for the matching of the vaccine.
        Return:
            hashd:
                dictionary.
    '''
    # --> collect data on age and sex
    data = data.merge(data_pop[["cpf", "sexo", "idade", "bairro"]], left_on="CPF", right_on="cpf", how="left")
    cols = ["DATA D1", "DATA D2"]
    for j in cols:
        data[j] = pd.to_datetime(data[j], format="%Y-%m-%d", errors="coerce")
    data["VACINA STATUS ATE FIM DA COORTE"] = data.apply(lambda x: f_vacina(x, config["final cohort"]), axis=1)
    
    # --> Strata conditions
    only_pareados = data["PAREADO"]==True
    only_d1 = pd.notna(data["DATA D1"]) & pd.isna(data["DATA D2"])
    d1_and_d2 = pd.notna(data["DATA D1"]) & pd.notna(data["DATA D2"])
    no_vaccine = pd.isna(data["DATA D1"]) & pd.isna(data["DATA D2"])
    males = data["sexo"]=="M"
    females = data["sexo"]=="F"
    idade_6069 = (data["idade"]>=60) & (data["idade"]<=69)
    idade_7079 = (data["idade"]>=70) & (data["idade"]<=79)
    idade_80 = (data["idade"]>=80) & (data["idade"]<=105)
    tipo_caso = data["TIPO"]=="CASO"
    tipo_controle = data["TIPO"]=="CONTROLE"
    obito_covid = pd.notna(data["DATA OBITO COVID"])
    obito_geral = pd.notna(data["DATA OBITO GERAL"])
    novac = data["VACINA STATUS ATE FIM DA COORTE"]=="NO VACCINE"
    d1_013 = data["VACINA STATUS ATE FIM DA COORTE"]=="D1 0-13 DAYS"
    d1_14 = data["VACINA STATUS ATE FIM DA COORTE"]=="D1 >= 14 DAYS"
    d2_013 = data["VACINA STATUS ATE FIM DA COORTE"]=="D2 0-13 DAYS"
    d2_14 = data["VACINA STATUS ATE FIM DA COORTE"]=="D2 >= 14 DAYS"
    
    # --> Apply strata conditions for whole population and separately for case and control individuals.
    hashd = {
        "TOTAL": data[only_pareados],
        "CASO": data[only_pareados & tipo_caso],
        "CONTROLE": data[only_pareados & tipo_controle],
        "NO VACCINE": data[(only_pareados) & (no_vaccine)],
        "ONLY D1": data[(only_pareados) & (only_d1)],
        "D1+D2": data[(only_pareados) & (d1_and_d2)],
        "MALES": data[(only_pareados) & (males)],
        "MALES CASO": data[(only_pareados) & (males) & (tipo_caso)],
        "MALES CONTROLE": data[(only_pareados) & (males) & (tipo_controle)],
        "FEMALES": data[(only_pareados) & (females)],
        "FEMALES CASO": data[(only_pareados) & (females) & (tipo_caso)],
        "FEMALES CONTROLE": data[(only_pareados) & (females) & (tipo_controle)],
        "60-69": data[(only_pareados) & (idade_6069)],
        "60-69 CASO": data[(only_pareados) & (idade_6069) & (tipo_caso)],
        "60-69 CONTROLE": data[(only_pareados) & (idade_6069) & (tipo_controle)],
        "70-79": data[(only_pareados) & (idade_7079)],
        "70-79 CASO": data[(only_pareados) & (idade_7079) & (tipo_caso)],
        "70-79 CONTROLE": data[(only_pareados) & (idade_7079) & (tipo_controle)],
        "80+": data[(only_pareados) & (idade_80)],
        "80+ CASO": data[(only_pareados) & (idade_80) & (tipo_caso)],
        "80+ CONTROLE": data[(only_pareados) & (idade_80) & (tipo_controle)],
        "OBITO COVID TOTAL": data[(only_pareados) & (obito_covid)],
        "OBITO COVID CASO": data[(only_pareados) & (obito_covid) & (tipo_caso)],
        "OBITO COVID CONTROLE": data[(only_pareados) & (obito_covid) & (tipo_controle)],
        "OBITO GERAL TOTAL": data[(only_pareados) & (obito_geral)],
        "OBITO GERAL CASO": data[(only_pareados) & (obito_geral) & (tipo_caso)],
        "OBITO GERAL CONTROLE": data[(only_pareados) & (obito_geral) & (tipo_controle)],
        "SEM VACINA TOTAL": data[(only_pareados) & (novac)],
        "SEM VACINA CASO": data[(only_pareados) & (novac) & (tipo_caso)],
        "SEM VACINA CONTROLE": data[(only_pareados) & (novac) & (tipo_controle)],
        "D1 0-13 TOTAL": data[(only_pareados) & (d1_013)],
        "D1 0-13 CASO": data[(only_pareados) & (d1_013) & (tipo_caso)],
        "D1 0-13 CONTROLE": data[(only_pareados) & (d1_013) & (tipo_controle)],
        "D1 >=14 TOTAL": data[(only_pareados) & (d1_14)],
        "D1 >=14 CASO": data[(only_pareados) & (d1_14) & (tipo_caso)],
        "D1 >=14 CONTROLE": data[(only_pareados) & (d1_14) & (tipo_controle)],
        "D2 0-13 TOTAL": data[(only_pareados) & (d2_013)],
        "D2 0-13 CASO": data[(only_pareados) & (d2_013) & (tipo_caso)],
        "D2 0-13 CONTROLE": data[(only_pareados) & (d2_013) & (tipo_controle)],
        "D2 >=14 TOTAL": data[(only_pareados) & (d2_14)],
        "D2 >=14 CASO": data[(only_pareados) & (d2_14) & (tipo_caso)],
        "D2 >=14 CONTROLE": data[(only_pareados) & (d2_14) & (tipo_controle)],
    }
    return hashd

# VE general - CORONAVAC

## Prepare configuration vars and Load data

In [4]:
config = {
    "init cohort": dt.date(2021, 1, 21),
    "final cohort": dt.date(2021, 8, 31),
    "seeds": np.arange(1,16,1),
    "data folder": None,
    "file_pareados": lambda seed: f"pareados_corona_{seed}.csv",
    "file_eventos": lambda seed: f"pares_eventos_corona_{seed}.csv",
    "pop_cohort": "pop_reservoir_corona.csv"
}
str_init = f"{config['init cohort'].day}{config['init cohort'].strftime('%b').upper()}{config['init cohort'].year}"
str_final = f"{config['final cohort'].day}{config['final cohort'].strftime('%b').upper()}{config['final cohort'].year}"
config["data folder"] = os.path.join("..", "output", "data", "COHORT_"+str_init+"_"+str_final)

In [5]:
# EVENTOS
seed = 1
df_eventos_pares = pd.read_csv(os.path.join(config["data folder"], config["file_eventos"](seed)))
df_eventos_pares.info()

FileNotFoundError: [Errno 2] No such file or directory: '..\\output\\data\\COHORT_21JAN2021_31AUG2021\\pares_eventos_corona_1.csv'

In [4]:
df_eventos_pares.describe()

Unnamed: 0,CPF CASO,CPF CONTROLE,INTV OBITO COVID CASO(D1),INTV OBITO GERAL CASO(D1),INTV OBITO COVID CONTROLE(D1),INTV OBITO GERAL CONTROLE(D1),INTV D1 CASO CONTROLE(D1),INTV FIM COORTE(D1),INTV OBITO COVID CASO(D2),INTV OBITO GERAL CASO(D2),INTV OBITO COVID CONTROLE(D2),INTV OBITO GERAL CONTROLE(D2),INTV D1 CASO CONTROLE(D2),INTV FIM COORTE(D2)
count,110389.0,110389.0,248.0,610.0,672.0,1081.0,87743.0,110389.0,71.0,503.0,657.0,1072.0,86656.0,108769.0
mean,25166250000.0,27362280000.0,41.274194,100.793443,36.270833,94.177613,8.881096,160.203788,44.71831,87.328032,7.774734,68.205224,-20.668875,130.650581
std,23691560000.0,24516080000.0,28.483347,54.401686,27.963376,57.313497,17.531579,13.967337,34.687661,48.81808,31.886199,59.56668,21.054848,20.17904
min,223301.0,223301.0,-20.0,-155.0,1.0,1.0,1.0,0.0,0.0,0.0,-81.0,-70.0,-154.0,-1.0
25%,7418744000.0,9127283000.0,23.0,53.0,17.0,45.0,1.0,152.0,18.0,43.5,-14.0,19.75,-33.0,118.0
50%,15499390000.0,16811220000.0,36.0,103.0,29.0,91.0,3.0,158.0,38.0,91.0,1.0,66.5,-24.0,129.0
75%,38011280000.0,41686870000.0,52.0,144.0,48.0,144.0,7.0,162.0,54.5,126.0,23.0,117.0,-14.0,138.0
max,99997500000.0,99997190000.0,147.0,221.0,161.0,230.0,214.0,222.0,133.0,191.0,143.0,220.0,194.0,200.0


In [6]:
# PAREADOS
df_pareados = pd.read_csv(os.path.join(config["data folder"], config["file_pareados"](seed)))
df_pareados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229437 entries, 0 to 229436
Data columns (total 8 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   CPF               229437 non-null  int64  
 1   DATA D1           204965 non-null  object 
 2   DATA D2           200579 non-null  object 
 3   DATA OBITO COVID  1096 non-null    object 
 4   DATA OBITO GERAL  1853 non-null    object 
 5   TIPO              229437 non-null  object 
 6   PAR               220898 non-null  float64
 7   PAREADO           229437 non-null  bool   
dtypes: bool(1), float64(1), int64(1), object(5)
memory usage: 12.5+ MB


In [7]:
df_pareados.describe()

Unnamed: 0,CPF,PAR
count,229437.0,220898.0
mean,26315740000.0,26256890000.0
std,24111220000.0,24136170000.0
min,223301.0,223301.0
25%,8848963000.0,8617454000.0
50%,16428400000.0,16385300000.0
75%,39132530000.0,39120930000.0
max,99997500000.0,99997500000.0


In [8]:
# POP COHORT
df_pop = pd.read_csv(os.path.join(config["data folder"], config["pop_cohort"]))
df_pop.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187921 entries, 0 to 187920
Data columns (total 41 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   nome                                   187921 non-null  object 
 1   cpf                                    187921 non-null  int64  
 2   data_nascimento                        187921 non-null  object 
 3   bairro                                 187921 non-null  object 
 4   sexo                                   187921 non-null  object 
 5   created_at                             187921 non-null  object 
 6   nome_mae                               187921 non-null  object 
 7   data D1(VACINADOS)                     163449 non-null  object 
 8   data D2(VACINADOS)                     159767 non-null  object 
 9   vacina(VACINADOS)                      163449 non-null  object 
 10  fornecedor(VACINADOS)                  163445 non-null  

In [9]:
df_pop.describe()

Unnamed: 0,cpf,idade anos(VACINADOS),bairro id(VACINADOS),do_8,do_8(CARTORIOS),idade
count,187921.0,163449.0,143531.0,962.0,2454.0,187921.0
mean,26467100000.0,70.247337,73.293156,30041250.0,30803490.0,70.415552
std,24178280000.0,6.303299,46.233427,2532544.0,2207954.0,6.577705
min,223301.0,6.0,1.0,25411040.0,25411030.0,60.0
25%,8988439000.0,65.0,34.0,31060310.0,31392950.0,65.0
50%,16503240000.0,69.0,71.0,31400460.0,31480840.0,69.0
75%,39322560000.0,73.0,105.0,31485240.0,31493570.0,74.0
max,99997500000.0,140.0,158.0,32018000.0,32901270.0,230.0


In [10]:
datasets = {
    "EVENTOS": None,
    "PARES": df_pareados,
    "POP": df_pop,
    "EVENTO_INFO": None
}

df_evento_info = surv.push_info(datasets["EVENTOS"], datasets["POP"])
datasets["EVENTO_INFO"] = df_evento_info
df_evento_info.info()

AttributeError: 'NoneType' object has no attribute 'merge'

## Counting

In [11]:
def stratify_pares(data, data_pop):
    '''
    
    '''
    # --> collect data on age and sex
    data = data.merge(data_pop[["cpf", "sexo", "idade", "bairro"]], left_on="CPF", right_on="cpf", how="left")
    cols = ["DATA D1", "DATA D2"]
    for j in cols:
        data[j] = pd.to_datetime(data[j], format="%Y-%m-%d", errors="coerce")
    
    data["VACINA STATUS ATE FIM DA COORTE"] = data.apply(lambda x: f_vacina(x, config["final cohort"]), axis=1)
    
    # --> conditions
    only_pareados = data["PAREADO"]==True
    only_d1 = pd.notna(data["DATA D1"]) & pd.isna(data["DATA D2"])
    d1_and_d2 = pd.notna(data["DATA D1"]) & pd.notna(data["DATA D2"])
    no_vaccine = pd.isna(data["DATA D1"]) & pd.isna(data["DATA D2"])
    males = data["sexo"]=="M"
    females = data["sexo"]=="F"
    idade_6069 = (data["idade"]>=60) & (data["idade"]<=69)
    idade_7079 = (data["idade"]>=70) & (data["idade"]<=79)
    idade_80 = (data["idade"]>=80) & (data["idade"]<=105)
    tipo_caso = data["TIPO"]=="CASO"
    tipo_controle = data["TIPO"]=="CONTROLE"
    obito_covid = pd.notna(data["DATA OBITO COVID"])
    obito_geral = pd.notna(data["DATA OBITO GERAL"])
    novac = data["VACINA STATUS ATE FIM DA COORTE"]=="NO VACCINE"
    d1_013 = data["VACINA STATUS ATE FIM DA COORTE"]=="D1 0-13 DAYS"
    d1_14 = data["VACINA STATUS ATE FIM DA COORTE"]=="D1 >= 14 DAYS"
    d2_013 = data["VACINA STATUS ATE FIM DA COORTE"]=="D2 0-13 DAYS"
    d2_14 = data["VACINA STATUS ATE FIM DA COORTE"]=="D2 >= 14 DAYS"
    
    hashd = {
        "TOTAL": data[only_pareados],
        "CASO": data[only_pareados & tipo_caso],
        "CONTROLE": data[only_pareados & tipo_controle],
        "NO VACCINE": data[(only_pareados) & (no_vaccine)],
        "ONLY D1": data[(only_pareados) & (only_d1)],
        "D1+D2": data[(only_pareados) & (d1_and_d2)],
        "MALES": data[(only_pareados) & (males)],
        "MALES CASO": data[(only_pareados) & (males) & (tipo_caso)],
        "MALES CONTROLE": data[(only_pareados) & (males) & (tipo_controle)],
        "FEMALES": data[(only_pareados) & (females)],
        "FEMALES CASO": data[(only_pareados) & (females) & (tipo_caso)],
        "FEMALES CONTROLE": data[(only_pareados) & (females) & (tipo_controle)],
        "60-69": data[(only_pareados) & (idade_6069)],
        "60-69 CASO": data[(only_pareados) & (idade_6069) & (tipo_caso)],
        "60-69 CONTROLE": data[(only_pareados) & (idade_6069) & (tipo_controle)],
        "70-79": data[(only_pareados) & (idade_7079)],
        "70-79 CASO": data[(only_pareados) & (idade_7079) & (tipo_caso)],
        "70-79 CONTROLE": data[(only_pareados) & (idade_7079) & (tipo_controle)],
        "80+": data[(only_pareados) & (idade_80)],
        "80+ CASO": data[(only_pareados) & (idade_80) & (tipo_caso)],
        "80+ CONTROLE": data[(only_pareados) & (idade_80) & (tipo_controle)],
        "OBITO COVID TOTAL": data[(only_pareados) & (obito_covid)],
        "OBITO COVID CASO": data[(only_pareados) & (obito_covid) & (tipo_caso)],
        "OBITO COVID CONTROLE": data[(only_pareados) & (obito_covid) & (tipo_controle)],
        "OBITO GERAL TOTAL": data[(only_pareados) & (obito_geral)],
        "OBITO GERAL CASO": data[(only_pareados) & (obito_geral) & (tipo_caso)],
        "OBITO GERAL CONTROLE": data[(only_pareados) & (obito_geral) & (tipo_controle)],
        "SEM VACINA TOTAL": data[(only_pareados) & (novac)],
        "SEM VACINA CASO": data[(only_pareados) & (novac) & (tipo_caso)],
        "SEM VACINA CONTROLE": data[(only_pareados) & (novac) & (tipo_controle)],
        "D1 0-13 TOTAL": data[(only_pareados) & (d1_013)],
        "D1 0-13 CASO": data[(only_pareados) & (d1_013) & (tipo_caso)],
        "D1 0-13 CONTROLE": data[(only_pareados) & (d1_013) & (tipo_controle)],
        "D1 >=14 TOTAL": data[(only_pareados) & (d1_14)],
        "D1 >=14 CASO": data[(only_pareados) & (d1_14) & (tipo_caso)],
        "D1 >=14 CONTROLE": data[(only_pareados) & (d1_14) & (tipo_controle)],
        "D2 0-13 TOTAL": data[(only_pareados) & (d2_013)],
        "D2 0-13 CASO": data[(only_pareados) & (d2_013) & (tipo_caso)],
        "D2 0-13 CONTROLE": data[(only_pareados) & (d2_013) & (tipo_controle)],
        "D2 >=14 TOTAL": data[(only_pareados) & (d2_14)],
        "D2 >=14 CASO": data[(only_pareados) & (d2_14) & (tipo_caso)],
        "D2 >=14 CONTROLE": data[(only_pareados) & (d2_14) & (tipo_controle)],
    }
    return hashd

def f_vacina(x, final_cohort):
    '''
        Define vaccination status based only on the cohort without outcome.
    '''
    dist_days = None
    if pd.isna(x["DATA D1"]) and pd.isna(x["DATA D2"]):
        dist_days = np.nan
        return "NO VACCINE"
    elif not pd.isna(x["DATA D2"]):
        dist_days = (final_cohort - x["DATA D2"].date()).days
        if dist_days<14:
            return "D2 0-13 DAYS"
        else:
            return "D2 >= 14 DAYS"
    elif not pd.isna(x["DATA D1"]):
        dist_days = (final_cohort - x["DATA D1"].date()).days
        if dist_days<14:
            return "D1 0-13 DAYS"
        else:
            return "D1 >= 14 DAYS"

In [12]:
hash_data = stratify_pares(datasets["PARES"], datasets["POP"])

In [13]:
cols = {"TOTAL": None, "CASO": None, "CONTROLE": None}
rows = {"Mean age(SD)": dict(cols), "Age 60-69": dict(cols), 
        "Age 70-79": dict(cols), "Age 80+": dict(cols), "Male": dict(cols), 
        "Female": dict(cols), "Death Covid": dict(cols), "Death NOT Covid": dict(cols), 
        "Vac -> Not Vaccinated": dict(cols), "Vac -> D1 0-13 DAYS": dict(cols), "Vac -> D1 >= 14 DAYS": dict(cols), 
        "Vac -> D1+D2 0-13 DAYS": dict(cols), "Vac -> D1+D2 >= 14 DAYS": dict(cols), "Total": dict(cols)}

In [14]:
def fill_table(rows, hash_data):
    rows["Total"]["TOTAL"] = f'{hash_data["TOTAL"].shape[0]}'
    rows["Total"]["CASO"] = f'{hash_data["CASO"].shape[0]}'
    rows["Total"]["CONTROLE"] = f'{hash_data["CONTROLE"].shape[0]}'
    rows["Mean age(SD)"]["TOTAL"] = f'{hash_data["TOTAL"]["idade"].mean():.2f}({hash_data["TOTAL"]["idade"].std():.2f})'
    rows["Mean age(SD)"]["CASO"] = f'{hash_data["CASO"]["idade"].mean():.2f}({hash_data["CASO"]["idade"].std():.2f})'
    rows["Mean age(SD)"]["CONTROLE"] = f'{hash_data["CONTROLE"]["idade"].mean():.2f}({hash_data["CONTROLE"]["idade"].std():.2f})'
    rows["Age 60-69"]["TOTAL"] = hash_data["60-69"].shape[0]
    rows["Age 60-69"]["CASO"] =  hash_data["60-69 CASO"].shape[0]
    rows["Age 60-69"]["CONTROLE"] = hash_data["60-69 CONTROLE"].shape[0]
    rows["Age 70-79"]["TOTAL"] = hash_data["70-79"].shape[0]
    rows["Age 70-79"]["CASO"] = hash_data["70-79 CASO"].shape[0]
    rows["Age 70-79"]["CONTROLE"] = hash_data["70-79 CONTROLE"].shape[0]
    rows["Age 80+"]["TOTAL"] = hash_data["80+"].shape[0]
    rows["Age 80+"]["CASO"] = hash_data["80+ CASO"].shape[0]
    rows["Age 80+"]["CONTROLE"] = hash_data["80+ CONTROLE"].shape[0]
    rows["Male"]["TOTAL"] = hash_data["MALES"].shape[0]
    rows["Male"]["CASO"] = hash_data["MALES CASO"].shape[0]
    rows["Male"]["CONTROLE"] = hash_data["MALES CONTROLE"].shape[0]
    rows["Female"]["TOTAL"] = hash_data["FEMALES"].shape[0]
    rows["Female"]["CASO"] = hash_data["FEMALES CASO"].shape[0]
    rows["Female"]["CONTROLE"] = hash_data["FEMALES CONTROLE"].shape[0]
    rows["Death Covid"]["TOTAL"] = hash_data["OBITO COVID TOTAL"].shape[0]
    rows["Death Covid"]["CASO"] = hash_data["OBITO COVID CASO"].shape[0]
    rows["Death Covid"]["CONTROLE"] = hash_data["OBITO COVID CONTROLE"].shape[0]
    rows["Death NOT Covid"]["TOTAL"] = hash_data["OBITO GERAL TOTAL"].shape[0]
    rows["Death NOT Covid"]["CASO"] = hash_data["OBITO GERAL CASO"].shape[0]
    rows["Death NOT Covid"]["CONTROLE"] = hash_data["OBITO GERAL CONTROLE"].shape[0]
    rows["Vac -> Not Vaccinated"]["TOTAL"] = hash_data["SEM VACINA TOTAL"].shape[0]
    rows["Vac -> Not Vaccinated"]["CASO"] = hash_data["SEM VACINA CASO"].shape[0]
    rows["Vac -> Not Vaccinated"]["CONTROLE"] = hash_data["SEM VACINA CONTROLE"].shape[0]
    rows["Vac -> D1 0-13 DAYS"]["TOTAL"] = hash_data["D1 0-13 TOTAL"].shape[0]
    rows["Vac -> D1 0-13 DAYS"]["CASO"] = hash_data["D1 0-13 CASO"].shape[0]
    rows["Vac -> D1 0-13 DAYS"]["CONTROLE"] = hash_data["D1 0-13 CONTROLE"].shape[0]
    rows["Vac -> D1 >= 14 DAYS"]["TOTAL"] = hash_data["D1 >=14 TOTAL"].shape[0]
    rows["Vac -> D1 >= 14 DAYS"]["CASO"] = hash_data["D1 >=14 CASO"].shape[0]
    rows["Vac -> D1 >= 14 DAYS"]["CONTROLE"] = hash_data["D1 >=14 CONTROLE"].shape[0]
    rows["Vac -> D1+D2 0-13 DAYS"]["TOTAL"] = hash_data["D2 0-13 TOTAL"].shape[0]
    rows["Vac -> D1+D2 0-13 DAYS"]["CASO"] = hash_data["D2 0-13 CASO"].shape[0]
    rows["Vac -> D1+D2 0-13 DAYS"]["CONTROLE"] = hash_data["D2 0-13 CONTROLE"].shape[0]
    rows["Vac -> D1+D2 >= 14 DAYS"]["TOTAL"] = hash_data["D2 >=14 TOTAL"].shape[0]
    rows["Vac -> D1+D2 >= 14 DAYS"]["CASO"] = hash_data["D2 >=14 CASO"].shape[0]
    rows["Vac -> D1+D2 >= 14 DAYS"]["CONTROLE"] = hash_data["D2 >=14 CONTROLE"].shape[0]
    
    return pd.DataFrame(rows).T

In [15]:
fill_table(rows, hash_data)

Unnamed: 0,TOTAL,CASO,CONTROLE
Mean age(SD),70.44(6.17),70.92(6.14),69.96(6.16)
Age 60-69,110653,52450,58203
Age 70-79,90552,47459,43093
Age 80+,19674,10530,9144
Male,85474,42737,42737
Female,135424,67712,67712
Death Covid,919,244,675
Death NOT Covid,1690,612,1078
Vac -> Not Vaccinated,22647,0,22647
Vac -> D1 0-13 DAYS,352,81,271


In [203]:
def count_deaths(hash_data):
    rows = {
        "SEM VACINA TOTAL": None, 
        "D1 0-13 TOTAL": None,
        "D1 >=14 TOTAL": None,
        "D2 0-13 TOTAL": None,
        "D2 >=14 TOTAL": None
    }
    
    for key in hash_data.keys():
        hash_data[key]["OBITO POR COVID"] = hash_data[key]["DATA OBITO COVID"].apply(lambda x: "NAO" if pd.isna(x) else "SIM")
    
    for key in rows.keys():
        rows[key] = pd.crosstab(hash_data[key]["TIPO"], hash_data[key]["OBITO POR COVID"])
    return rows
    
    

In [205]:
cross = count_deaths(hash_data)

In [218]:
cross["D2 >=14 TOTAL"].T

TIPO,CASO,CONTROLE
OBITO POR COVID,Unnamed: 1_level_1,Unnamed: 2_level_1
NAO,108620,84821
SIM,71,63


In [216]:
(71/108620)/(63/84821)

0.8800581903417477

In [219]:
63+71

134