# Dicrectization of mortality levels by years.
## Flow: execute after covid notebooks.

In [11]:
import os
import sys

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from pandas import CategoricalDtype

from sklearn.mixture import GaussianMixture
from datetime import datetime

## Loading data

In [12]:
df_base = pd.read_csv('../coivd/data/input/df_municpal.csv')[['CODIGO_MUNICIPIO_6', 'CODIGO_MUNICIPIO_7', 'MUNICIPIO', 'SIGLA_ESTADO', 'ESTADO', 'REGIAO', 'POPULACAO_2022', 'CAPITAL']]
df_base.CODIGO_MUNICIPIO_6 = df_base.CODIGO_MUNICIPIO_6.astype('Int64')

In [13]:
df_municipalities = pd.read_csv('../covid/data/output/df_sim_covid_deaths_municipal_timeseries.csv')[['DATA','CODIGO_MUNICIPIO_6', 'OBITOS']]
df_municipalities.DATA = pd.to_datetime(df_municipalities.DATA, infer_datetime_format=True)
df_municipalities.CODIGO_MUNICIPIO_6 = df_municipalities.CODIGO_MUNICIPIO_6.astype('Int64')
df_municipalities = df_municipalities.sort_values('DATA')

## Summary deaths by years

In [14]:
END_FIRST_SEMESTER_2020 = datetime.fromisoformat('2020-06-30')
END_2020 = datetime.fromisoformat('2020-12-31')
END_2021 = datetime.fromisoformat('2021-12-31')
END_2022 = datetime.fromisoformat('2022-12-31')

In [15]:
df_municipalities_end_study = df_municipalities[df_municipalities.DATA == END_2022].dropna().copy()

In [16]:
df_mortality = df_base.merge(df_municipalities_end_study, how='left', left_on='CODIGO_MUNICIPIO_6', right_on='CODIGO_MUNICIPIO_6')
df_mortality.OBITOS = df_mortality.OBITOS.fillna(0)

In [17]:
for moment, column in zip([END_FIRST_SEMESTER_2020, END_2020, END_2021], ['OBITOS_ACUMULADOS_PRIMEIRO_SEMESTRE_2020', 'OBITOS_ACUMULADOS_2020', 'OBITOS_ACUMULADOS_2021']):
    df = df_municipalities[df_municipalities.DATA == moment].dropna().copy()
    df = df.rename(columns = {'OBITOS': column})
    df_mortality = df_mortality.merge(df[['CODIGO_MUNICIPIO_6', column]], how='left', left_on='CODIGO_MUNICIPIO_6', right_on='CODIGO_MUNICIPIO_6')
    df_mortality[column] = df_mortality[column].fillna(0)

In [18]:
df_mortality['TAXA_OBITOS_ACUMULADOS_PRIMEIRO_SEMESTRE_2020'] = df_mortality.OBITOS_ACUMULADOS_PRIMEIRO_SEMESTRE_2020 / df_mortality.POPULACAO_2022 * 100000
df_mortality['TAXA_OBITOS_ACUMULADO_2020'] = df_mortality.OBITOS_ACUMULADOS_2020 / df_mortality.POPULACAO_2022 * 100000
df_mortality['TAXA_OBITOS_ACUMULADO_2021'] = df_mortality.OBITOS_ACUMULADOS_2021 / df_mortality.POPULACAO_2022 * 100000
df_mortality['TAXA_OBITOS_ACUMULADO'] = df_mortality.OBITOS / df_mortality.POPULACAO_2022 * 100000

In [19]:
df_mortality['TAXA_OBITOS_2022'] = df_mortality.TAXA_OBITOS_ACUMULADO - df_mortality.TAXA_OBITOS_ACUMULADO_2021
df_mortality['TAXA_OBITOS_2021'] = df_mortality.TAXA_OBITOS_ACUMULADO_2021 - df_mortality.TAXA_OBITOS_ACUMULADO_2020
df_mortality['TAXA_OBITOS_2020'] = df_mortality.TAXA_OBITOS_ACUMULADO_2020
df_mortality['TAXA_OBITOS_PRIMEIRO_SEMESTRE_2020'] = df_mortality.TAXA_OBITOS_ACUMULADOS_PRIMEIRO_SEMESTRE_2020

In [20]:
df_mortality[['TAXA_OBITOS_PRIMEIRO_SEMESTRE_2020','TAXA_OBITOS_2020', 'TAXA_OBITOS_2021', 'TAXA_OBITOS_2022', 'TAXA_OBITOS_ACUMULADO']].describe()

Unnamed: 0,TAXA_OBITOS_PRIMEIRO_SEMESTRE_2020,TAXA_OBITOS_2020,TAXA_OBITOS_2021,TAXA_OBITOS_2022,TAXA_OBITOS_ACUMULADO
count,5570.0,5570.0,5570.0,5570.0,5570.0
mean,14.696325,66.152095,178.27501,30.605729,275.032835
std,23.331547,46.947576,100.483435,26.902876,129.373638
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,32.310178,102.28871,12.470605,179.122619
50%,4.746649,58.304085,162.801932,25.557554,259.386552
75%,20.348214,92.691946,239.30602,42.327151,352.820751
max,307.503075,433.526012,712.634823,225.479143,885.296382


## Summary deaths by months

In [21]:
end_months_str = ['2020-04-30', '2020-06-30', '2020-08-31', '2020-10-31', '2020-12-31', '2021-02-28', '2021-04-30', '2021-06-30', '2021-08-31', '2021-10-31', '2021-12-31', '2022-02-28', '2022-04-30', '2022-06-30', '2022-08-31', '2022-10-31', '2022-12-31']

In [22]:
end_months_datetime = [datetime.fromisoformat(item) for item in end_months_str] 

In [23]:
month_columns = ['OBITOS_ACUMULADOS_{}_{:02d}'.format(dt.year, dt.month) for dt in end_months_datetime]

In [24]:
for moment, column in zip(end_months_datetime, month_columns):
    df = df_municipalities[df_municipalities.DATA == moment].dropna().copy()
    df = df.rename(columns = {'OBITOS': column})
    df_mortality = df_mortality.merge(df[['CODIGO_MUNICIPIO_6', column]], how='left', left_on='CODIGO_MUNICIPIO_6', right_on='CODIGO_MUNICIPIO_6')
    df_mortality[column] = df_mortality[column].fillna(0)
    df_mortality[column+'_TAXA'] = df_mortality[column] / df_mortality.POPULACAO_2022 * 100000

In [25]:
for i in range(len(month_columns)):
    column = month_columns[i]
    if i == 0:
        df_mortality[column+'_TAXA_DOIS_MESES'] = df_mortality[column+'_TAXA']
    else:
        df_mortality[column+'_TAXA_DOIS_MESES'] = df_mortality[column+'_TAXA'] - df_mortality[month_columns[i-1]+'_TAXA'] 

In [26]:
df_mortality.columns

Index(['CODIGO_MUNICIPIO_6', 'CODIGO_MUNICIPIO_7', 'MUNICIPIO', 'SIGLA_ESTADO',
       'ESTADO', 'REGIAO', 'POPULACAO_2022', 'CAPITAL', 'DATA', 'OBITOS',
       'OBITOS_ACUMULADOS_PRIMEIRO_SEMESTRE_2020', 'OBITOS_ACUMULADOS_2020',
       'OBITOS_ACUMULADOS_2021',
       'TAXA_OBITOS_ACUMULADOS_PRIMEIRO_SEMESTRE_2020',
       'TAXA_OBITOS_ACUMULADO_2020', 'TAXA_OBITOS_ACUMULADO_2021',
       'TAXA_OBITOS_ACUMULADO', 'TAXA_OBITOS_2022', 'TAXA_OBITOS_2021',
       'TAXA_OBITOS_2020', 'TAXA_OBITOS_PRIMEIRO_SEMESTRE_2020',
       'OBITOS_ACUMULADOS_2020_04', 'OBITOS_ACUMULADOS_2020_04_TAXA',
       'OBITOS_ACUMULADOS_2020_06', 'OBITOS_ACUMULADOS_2020_06_TAXA',
       'OBITOS_ACUMULADOS_2020_08', 'OBITOS_ACUMULADOS_2020_08_TAXA',
       'OBITOS_ACUMULADOS_2020_10', 'OBITOS_ACUMULADOS_2020_10_TAXA',
       'OBITOS_ACUMULADOS_2020_12', 'OBITOS_ACUMULADOS_2020_12_TAXA',
       'OBITOS_ACUMULADOS_2021_02', 'OBITOS_ACUMULADOS_2021_02_TAXA',
       'OBITOS_ACUMULADOS_2021_04', 'OBITOS_ACUMULADOS_2

## Discretization mortality rate in 5 levels using Gaussian Mixture with spherical covariance

In [27]:
dict_column_labels = {'TAXA_OBITOS_PRIMEIRO_SEMESTRE_2020': 'Death rate (1/2020)','TAXA_OBITOS_2020': 'Death rate (2020)', 'TAXA_OBITOS_2021': 'Death rate (2021)', 'TAXA_OBITOS_2022': 'Death rate (2022)', 'TAXA_OBITOS_ACUMULADO': 'Death rate (accumulated period)', 'OBITOS_ACUMULADOS_2020_04_TAXA_DOIS_MESES': 'Death rate (March and April)',
    'OBITOS_ACUMULADOS_2020_06_TAXA_DOIS_MESES': 'Death rate (May and June)',
    'OBITOS_ACUMULADOS_2020_08_TAXA_DOIS_MESES': 'Death rate (July and August)',
    'OBITOS_ACUMULADOS_2020_10_TAXA_DOIS_MESES': 'Death rate (September and October)',
    'OBITOS_ACUMULADOS_2020_12_TAXA_DOIS_MESES': 'Death rate (November and December)',
    'OBITOS_ACUMULADOS_2021_02_TAXA_DOIS_MESES': 'Death rate (January and February)',
    'OBITOS_ACUMULADOS_2021_04_TAXA_DOIS_MESES': 'Death rate (March and April)',
    'OBITOS_ACUMULADOS_2021_06_TAXA_DOIS_MESES': 'Death rate (May and June)',
    'OBITOS_ACUMULADOS_2021_08_TAXA_DOIS_MESES': 'Death rate (July and August)',
    'OBITOS_ACUMULADOS_2021_10_TAXA_DOIS_MESES': 'Death rate (September and October)',
    'OBITOS_ACUMULADOS_2021_12_TAXA_DOIS_MESES': 'Death rate (November and December)',
    'OBITOS_ACUMULADOS_2022_02_TAXA_DOIS_MESES': 'Death rate (January and February)',
    'OBITOS_ACUMULADOS_2022_04_TAXA_DOIS_MESES': 'Death rate (March and April)',
    'OBITOS_ACUMULADOS_2022_06_TAXA_DOIS_MESES': 'Death rate (May and June)',
    'OBITOS_ACUMULADOS_2022_08_TAXA_DOIS_MESES': 'Death rate (July and August)',
    'OBITOS_ACUMULADOS_2022_10_TAXA_DOIS_MESES': 'Death rate (September and October)',
    'OBITOS_ACUMULADOS_2022_12_TAXA_DOIS_MESES': 'Death rate (November and December)'}

labels_sorted = ['Very low', 'Low', 'Intermediate', "High", "Very high", "Extremely high"]
level_column_names = ['very_low', 'low', 'intermediate', "high", "very_high", "extremely_high"]

In [28]:
j = 0
for column_period in dict_column_labels.keys():
    df = df_mortality

    classificator = GaussianMixture(6, n_init=100, covariance_type='spherical')
    Y = classificator.fit_predict(df[[column_period]])
    nivel_coluna = 'NIVEL_' + column_period
    df[nivel_coluna] = Y

    filename = 'data/em_spherical_weights_' + column_period + '.npy'
    np.save(filename, classificator.weights_)
    filename = 'data/em_spherical_means_' + column_period + '.npy'
    np.save(filename, classificator.means_)
    filename = 'data/em_spherical_precisions_' + column_period + '.npy'
    np.save(filename, classificator.precisions_)

    df_probability_mortality_levels = pd.DataFrame(classificator.predict_proba(df[[column_period]]), index=df.index)

    levels_mortality_sorted = df.groupby(nivel_coluna).min()[[column_period]].sort_values(column_period).reset_index().T.values[0]
    level_column_names_original_order = [None, None, None, None, None, None]

    nivel_label = 'NIVEL_LABEL_' + column_period
    for i in range(len(levels_mortality_sorted)):
        df.loc[df[nivel_coluna] == levels_mortality_sorted[i], nivel_label] = labels_sorted[i]
        level_column_names_original_order[int(levels_mortality_sorted[i])] = level_column_names[i]
    cat_type = CategoricalDtype(categories=labels_sorted, ordered=True)
    df[nivel_label] = df[nivel_label].astype(cat_type)
    df_probability_mortality_levels.columns = level_column_names_original_order

    filename = 'data/df_probability_levels_' + column_period + '.csv'
    df_probability_mortality_levels.to_csv(filename, index=True)

Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7fc1b5330280>
Traceback (most recent call last):
  File "/home/helder/anaconda3/envs/base-r/lib/python3.9/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/helder/anaconda3/envs/base-r/lib/python3.9/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/helder/anaconda3/envs/base-r/lib/python3.9/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/helder/anaconda3/envs/base-r/lib/python3.9/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo

## Saving data

In [29]:
filename = 'data/df_mortality.csv'
df_mortality.to_csv(filename, index=True)

In [30]:
df_mortality

Unnamed: 0,CODIGO_MUNICIPIO_6,CODIGO_MUNICIPIO_7,MUNICIPIO,SIGLA_ESTADO,ESTADO,REGIAO,POPULACAO_2022,CAPITAL,DATA,OBITOS,...,NIVEL_OBITOS_ACUMULADOS_2022_04_TAXA_DOIS_MESES,NIVEL_LABEL_OBITOS_ACUMULADOS_2022_04_TAXA_DOIS_MESES,NIVEL_OBITOS_ACUMULADOS_2022_06_TAXA_DOIS_MESES,NIVEL_LABEL_OBITOS_ACUMULADOS_2022_06_TAXA_DOIS_MESES,NIVEL_OBITOS_ACUMULADOS_2022_08_TAXA_DOIS_MESES,NIVEL_LABEL_OBITOS_ACUMULADOS_2022_08_TAXA_DOIS_MESES,NIVEL_OBITOS_ACUMULADOS_2022_10_TAXA_DOIS_MESES,NIVEL_LABEL_OBITOS_ACUMULADOS_2022_10_TAXA_DOIS_MESES,NIVEL_OBITOS_ACUMULADOS_2022_12_TAXA_DOIS_MESES,NIVEL_LABEL_OBITOS_ACUMULADOS_2022_12_TAXA_DOIS_MESES
0,110001,1100015,Alta Floresta D'Oeste,RO,Rondônia,Norte,21495,0,2022-12-31,93.0,...,2,Intermediate,0,Low,0,Very low,3,Intermediate,2,Low
1,110002,1100023,Ariquemes,RO,Rondônia,Norte,96833,0,2022-12-31,540.0,...,0,Low,2,Very low,1,High,0,Very low,2,Low
2,110003,1100031,Cabixi,RO,Rondônia,Norte,5363,0,2022-12-31,15.0,...,4,Very low,4,Intermediate,0,Very low,0,Very low,0,Very low
3,110004,1100049,Cacoal,RO,Rondônia,Norte,86895,0,2022-12-31,351.0,...,0,Low,0,Low,3,Low,0,Very low,2,Low
4,110005,1100056,Cerejeiras,RO,Rondônia,Norte,15890,0,2022-12-31,76.0,...,4,Very low,0,Low,3,Low,0,Very low,2,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5565,522200,5222005,Vianópolis,GO,Goiás,Centro-Oeste,14956,0,2022-12-31,47.0,...,4,Very low,0,Low,3,Low,0,Very low,2,Low
5566,522205,5222054,Vicentinópolis,GO,Goiás,Centro-Oeste,8768,0,2022-12-31,34.0,...,4,Very low,2,Very low,0,Very low,0,Very low,0,Very low
5567,522220,5222203,Vila Boa,GO,Goiás,Centro-Oeste,4215,0,2022-12-31,7.0,...,4,Very low,2,Very low,1,High,0,Very low,0,Very low
5568,522230,5222302,Vila Propício,GO,Goiás,Centro-Oeste,5815,0,2022-12-31,11.0,...,4,Very low,2,Very low,1,High,0,Very low,0,Very low
