# Importing libraries and appending paths 

In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import sys
import plotly.graph_objects as go

In [29]:
sys.path.append('..')

# Importing and padding serotype data

In [30]:
file_s = '../Data/dengue_BR_serotypes.csv'
s_data = pd.read_csv(file_s, delimiter = ';')

In [31]:
s_data = s_data.groupby(['YEAR','ID_MUNICIP','SOROTIPO'])['NUMBER'].sum()
s_data = s_data.to_frame(name = 'CASES').reset_index()

In [32]:
years = s_data['YEAR'].unique()
muns = s_data['ID_MUNICIP'].unique()
serotypes = s_data['SOROTIPO'].unique()

In [33]:
years_df = []
muns_df = []
serotypes_df = []
cases_df = []

In [34]:
for year in years:
    for mun in muns:
        for serotype in serotypes:
            years_df.append(year)
            muns_df.append(mun)
            serotypes_df.append(serotype)
            filt_tmp = ((s_data['ID_MUNICIP'] == mun) & (s_data['YEAR'] == year) & (s_data['SOROTIPO'] == serotype))
            s_tmp = s_data[filt_tmp]
            if(s_tmp.empty):
                cases_df.append(0)
            else:
                cases_df.append(s_tmp.iloc[0]['CASES'])

In [35]:
df_total = pd.DataFrame(list(zip(years_df, muns_df, serotypes_df, cases_df)),
                   columns =['YEAR', 'ID_MUNICIP', 'SEROTYPES', 'CASES'])

In [36]:
df_total

Unnamed: 0,YEAR,ID_MUNICIP,SEROTYPES,CASES
0,2007,110001,,46
1,2007,110001,1,0
2,2007,110001,3,0
3,2007,110001,4,0
4,2007,110001,2,0
...,...,...,...,...
389020,2021,432290,,2
389021,2021,432290,1,0
389022,2021,432290,3,0
389023,2021,432290,4,0


## Cases, tests and ratio per municipality for serotype
That is, percentage of positive cases that were tested for serotype 1,2,3 or 4. That is, **cases tested/total cases**.

In [46]:
# total tests per municipality per year
df_res = df_total.copy()
filt_res = (df_res['SEROTYPES'] != ' ')
df_res = df_res[filt_res]
df_res = df_res.groupby(['YEAR','ID_MUNICIP'])['CASES'].sum()
df_res = df_res.to_frame(name = 'TESTS').reset_index()
df_tmp = df_total.copy()
df_tmp = df_tmp.groupby(['YEAR','ID_MUNICIP'])['CASES'].sum()
df_tmp = df_tmp.to_frame(name = 'CASES').reset_index()
df_res['CASES'] = df_tmp['CASES']
df_res['RATIO'] = df_res['TESTS']/df_res['CASES']

In [47]:
df_res = df_res.dropna()

## Percentage of tests per municipality for serotype
That is, percentage of positive cases that were tested for serotype 1,2,3 or 4. That is, **cases tested/total cases**.

In [48]:
df_res_ratio = df_res.copy()
df_res_ratio = df_res_ratio.drop(columns = ['TESTS','CASES'])
df_res_ratio = df_res_ratio.pivot(index='YEAR', columns='ID_MUNICIP').droplevel(0, axis=1)

In [49]:
df_stats_ratio = pd.DataFrame()
df_stats_ratio['MEAN'] = df_res_ratio.mean(axis = 0)
df_stats_ratio['STD'] = df_res_ratio.std(axis = 0)
df_stats_ratio['STD/MEAN'] = df_res_ratio.std(axis = 0)/df_res_ratio.mean(axis = 0)
df_stats_ratio['MAX'] = df_res_ratio.max(axis = 0)
df_stats_ratio['MIN'] = df_res_ratio.min(axis = 0)

In [51]:
df_stats_ratio = df_stats_ratio.dropna()

In [52]:
df_stats_ratio.sort_values(['STD/MEAN'],ascending = [True])

Unnamed: 0_level_0,MEAN,STD,STD/MEAN,MAX,MIN
ID_MUNICIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
355030,0.005233,0.003341,0.638394,0.010955,0.000000
520870,0.010736,0.007660,0.713432,0.028322,0.001675
251610,0.291667,0.209718,0.719032,0.500000,0.000000
410740,0.208333,0.159571,0.765942,0.333333,0.000000
412850,0.202381,0.162068,0.800807,0.333333,0.000000
...,...,...,...,...,...
110010,0.000300,0.001163,3.872983,0.004505,0.000000
520490,0.000080,0.000311,3.872983,0.001206,0.000000
261080,0.001515,0.005868,3.872983,0.022727,0.000000
510794,0.005333,0.020656,3.872983,0.080000,0.000000


## Saving because this takes a lot to process

In [54]:
df_stats_ratio.to_csv('../Data/dengue_municipality_tests.csv',index=False)