# Importing libraries and appending paths 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import sys
import plotly.graph_objects as go

In [2]:
sys.path.append('..')

# Importing and padding serotype data

In [3]:
file_s = '../Data/dengufe_BR_serotypes.csv'
s_data = pd.read_csv(file_s, delimiter = ';')

In [4]:
s_data = s_data.groupby(['YEAR','ID_MUNICIP','SOROTIPO'])['NUMBER'].sum()
s_data = s_data.to_frame(name = 'CASES').reset_index()

In [5]:
years = s_data['YEAR'].unique()
muns = s_data['ID_MUNICIP'].unique()
serotypes = s_data['SOROTIPO'].unique()

In [6]:
years_df = []
muns_df = []
serotypes_df = []
cases_df = []

In [7]:
for year in years:
    for mun in muns:
        for serotype in serotypes:
            years_df.append(year)
            muns_df.append(mun)
            serotypes_df.append(serotype)
            filt_tmp = ((s_data['ID_MUNICIP'] == mun) & (s_data['YEAR'] == year) & (s_data['SOROTIPO'] == serotype))
            s_tmp = s_data[filt_tmp]
            if(s_tmp.empty):
                cases_df.append(0)
            else:
                cases_df.append(s_tmp.iloc[0]['CASES'])

In [8]:
df_total = pd.DataFrame(list(zip(years_df, muns_df, serotypes_df, cases_df)),
                   columns =['YEAR', 'ID_MUNICIP', 'SEROTYPES', 'CASES'])

In [9]:
df_total

Unnamed: 0,YEAR,ID_MUNICIP,SEROTYPES,CASES
0,2007,110001,,46
1,2007,110001,1,0
2,2007,110001,3,0
3,2007,110001,4,0
4,2007,110001,2,0
...,...,...,...,...
389020,2021,432290,,2
389021,2021,432290,1,0
389022,2021,432290,3,0
389023,2021,432290,4,0


## Cases, tests and ratio per municipality for serotype
That is, percentage of positive cases that were tested for serotype 1,2,3 or 4. That is, **cases tested/total cases**.

In [10]:
# total tests per municipality per year
df_res = df_total.copy()
filt_res = (df_res['SEROTYPES'] != ' ')
df_res = df_res[filt_res]
df_res = df_res.groupby(['YEAR','ID_MUNICIP'])['CASES'].sum()
df_res = df_res.to_frame(name = 'TESTS').reset_index()
df_tmp = df_total.copy()
df_tmp = df_tmp.groupby(['YEAR','ID_MUNICIP'])['CASES'].sum()
df_tmp = df_tmp.to_frame(name = 'CASES').reset_index()
df_res['CASES'] = df_tmp['CASES']
df_res['RATIO'] = df_res['TESTS']/df_res['CASES']

In [11]:
df_res = df_res.dropna()

## Percentage of tests per municipality for serotype
That is, percentage of positive cases that were tested for serotype 1,2,3 or 4. That is, **cases tested/total cases**.

In [12]:
df_res_ratio = df_res.copy()
df_res_ratio = df_res_ratio.drop(columns = ['TESTS','CASES'])
df_res_ratio = df_res_ratio.pivot(index='YEAR', columns='ID_MUNICIP').droplevel(0, axis=1)

In [13]:
df_stats_ratio = pd.DataFrame()
df_stats_ratio['MEAN'] = df_res_ratio.mean(axis = 0)
df_stats_ratio['STD'] = df_res_ratio.std(axis = 0)
df_stats_ratio['STD/MEAN'] = df_res_ratio.std(axis = 0)/df_res_ratio.mean(axis = 0)
df_stats_ratio['MAX'] = df_res_ratio.max(axis = 0)
df_stats_ratio['MIN'] = df_res_ratio.min(axis = 0)

In [14]:
df_stats_ratio = df_stats_ratio.dropna()

In [24]:
df_filter = df_stats_ratio.copy()
df_filter = df_filter[df_filter['MIN'] != 0]

In [26]:
df_filter.sort_values(['STD/MEAN'],ascending = [True])

Unnamed: 0_level_0,MEAN,STD,STD/MEAN,MAX,MIN
ID_MUNICIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
520870,0.010736,0.00766,0.713432,0.028322,0.001675
530010,0.011684,0.009461,0.809731,0.032349,0.00221
240810,0.005264,0.005326,1.011716,0.021739,0.000875
230440,0.003171,0.003333,1.051072,0.009889,6.6e-05
310620,0.010712,0.011935,1.114167,0.04698,0.000957
330455,0.010669,0.014366,1.346479,0.059553,0.00025
354340,0.040425,0.055685,1.377482,0.170492,0.000587
261160,0.004747,0.007254,1.527912,0.029367,0.000284
354980,0.064883,0.102619,1.581598,0.333333,0.001736
172100,0.012695,0.023366,1.840645,0.086031,0.000462


## Saving because this takes a lot to process

In [20]:
df_stats_ratio.to_csv('../Data/dengue_municipality_tests.csv')