In [565]:
import numpy as np
import pandas as pd
import os
from helper_functions import *
from herams_helper_functions import *

In [2]:
def reorder_columns(data, colnum):
    cols = data.columns.tolist()
    cols = cols[colnum:] + cols[:colnum]
    data = data[cols]
    return data

In [3]:
def convert_to_int(df):
    float_columns = df.loc[:, df.dtypes == float].columns.tolist()
    for col in float_columns:
        df[col] = df[col].astype('Int64')
    return df

### Meningite

In [19]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Meningite ', header=4)

In [20]:
df = df.iloc[:87, :-12]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [21]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)

In [22]:
cercle_replace = {'COMMUNE 1': 'COMMUNE I', 'COMMUNE 2': 'COMMUNE II', 'COMMUNE 3': 'COMMUNE III', 'COMMUNE 4': 'COMMUNE IV', 'COMMUNE 5': 'COMMUNE V', 
                  'COMMUNE 6': 'COMMUNE VI', 'ACHOURATT': 'ACHOURAT', 'AÏBEBARA': 'ABEIBARA', 'ALMOUSTRAT': 'ALMOUSTARAT', 'AL_OURCH': 'AL-OURCHE',
                  'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 'GOURMA RHAROUS': 'GOURMA-RHAROUS', 'TAOUDÉNI': 'TAOUDENIT', 'FOUM_ALBA': 'FOUM-ELBA',
                  'NIÈNA': 'NIENA', 'TINDERMEN': 'TIDERMENE', 'SÉFETO': 'SEFETO', 'MÉNAKA': 'MENAKA', 'TINESSAKO': 'TIN-ESSAKO'}
region_replace = {'SÉGOU': 'SEGOU', 'MÉNAKA': 'MENAKA', 'TAOUDÉNI': 'TAOUDENIT', 'GENERAL': 'MALI'}
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [23]:
regions = pd.read_csv('data/regions.csv')
cercle = pd.read_csv('data/cercle.csv')
regions = get_serial_column(regions, ['REGION'], 'RegionIndex')
cercle = get_serial_column(cercle, ['RegionIndex', 'CERCLE'], 'CercleIndex')

In [24]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [25]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [26]:
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [27]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [28]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [29]:
df_reg.to_csv('data/region_meningite.csv', index=False)
df_long.to_csv('data/meningite.csv', index=False)

### Rougeole

In [30]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Rougeole', header=3)

In [31]:
df = df.iloc[:87, :-10]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [32]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)

In [33]:
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [34]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [35]:
df_long = df_long[(df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [36]:
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [37]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [38]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [39]:
df_reg.to_csv('data/region_rougeole.csv', index=False)
df_long.to_csv('data/rougeole.csv', index=False)

### Fièvre Jaune

In [40]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Fièvre Jaune', header=4)

In [41]:
df = df.iloc[:87, :-15]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [42]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [43]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [44]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [45]:
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [46]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [47]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [48]:
df_reg.to_csv('data/region_fievre_jaune.csv', index=False)
df_long.to_csv('data/fievre_jaune.csv', index=False)

### Cholera

In [49]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Cholera ', header=4)

In [50]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [51]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [52]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [53]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [54]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [55]:
df_reg.to_csv('data/region_cholera.csv', index=False)
df_long.to_csv('data/cholera.csv', index=False)

### Rage Humaine

In [64]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Rage Humaine ', header=4)

In [65]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [66]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [67]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [68]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [69]:
df_reg.loc[df_reg.AN.isnull(), 'AN'] = 2020

In [70]:
df_reg.to_csv('data/region_rage_humaine.csv', index=False)
df_long.to_csv('data/rage_humaine.csv', index=False)

### Charbon

In [73]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Charbon', header=3)

In [74]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [75]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [76]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [77]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [78]:
df_reg.to_csv('data/region_charbon.csv', index=False)
df_long.to_csv('data/charbon.csv', index=False)

### Piqures Serpents 

In [89]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='piqures Serpents ', header=3)

In [90]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [91]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [92]:
df_reg.loc[df_reg['WEEKLY VALUE']=='Err. saisie!!', 'WEEKLY VALUE'] = np.nan

In [93]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [94]:
df_reg.to_csv('data/region_piqures_serpents.csv', index=False)
df_long.to_csv('data/piqures_serpents.csv', index=False)

###  Morssures Chien 

In [100]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name=' Morssures Chien ', header=3)

In [101]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [102]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [103]:
df_reg.loc[df_reg['WEEKLY VALUE']=='Err. saisie!!', 'WEEKLY VALUE'] = np.nan

In [104]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [105]:
df_reg.to_csv('data/region_morssures_chien.csv', index=False)
df_long.to_csv('data/morssures_chien.csv', index=False)

### Piqures Scorpion 

In [106]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='piqures Scorpion ', header=3)

In [107]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [108]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [109]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [110]:
df_reg.to_csv('data/region_piqures_scorpion.csv', index=False)
df_long.to_csv('data/piqures_scorpion.csv', index=False)

### Diarrhée rouge

In [111]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Diarrhée rouge', header=3)

In [112]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [113]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [114]:
df_long = convert_to_int(df_long)
df_reg = convert_to_int(df_reg)
df_long['WEEKLY VALUE'] = df_long['WEEKLY VALUE'].astype('float')
df_reg['WEEKLY VALUE'] = df_reg['WEEKLY VALUE'].astype('float')

In [115]:
df_reg.to_csv('data/region_diarhee_rouge.csv', index=False)
df_long.to_csv('data/diarhee_rouge.csv', index=False)

### COVID-19

In [148]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='COVID19', header=3)

In [149]:
df = df.iloc[:87, :-7]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [151]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [152]:
df_reg['POP'] = df_reg['POP'].astype('Int64')
df_long['POP'] = df_long['POP'].astype('Int64')

In [153]:
df_reg.to_csv('data/region_covid19.csv', index=False)
df_long.to_csv('data/covid19.csv', index=False)

### PFA

In [232]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='PFA', header=3)

In [233]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})

In [234]:
df = df.iloc[:87, :-5]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [235]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'

In [236]:
cercle_replace_ = {'ACHOURATT': 'ACHOURAT', 'ALMOUSTRAT': 'ALMOUSTARAT', 'AL-OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 
                   'TAOUDÉNIT': 'TAOUDENIT', 'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 'SAGABARY': 'SAGABARI',
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU'}

In [237]:
df = replace_values(df, ['DISTRICT'], [cercle_replace_])

In [238]:
id_var_columns = list(df.columns[:3])
value_var_columns = list(df.columns[3:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='CAS')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [239]:
assert df_long.REGION.nunique() == 12
assert df_long.DISTRICT.nunique() == 76

In [240]:
df_long = df_long[df_long['REGION'] != 'MALI']
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_long['YEAR'] = 2020
df_reg['YEAR'] = 2020

In [241]:
assert df_reg.REGION.nunique() == 11
assert df_long.REGION.nunique() == 11
assert df_long.DISTRICT.nunique() == 75

In [242]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [243]:
assert df_reg.RegionIndex.nunique() == 11

In [244]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [246]:
df_long = df_long[['RegionIndex', 'CercleIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS']]
df_reg = df_reg[['RegionIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS']]

In [247]:
df_long['POP'] = df_long['POP'].round()
df_long['POP'] = df_long['POP'].astype('Int64')

In [249]:
df_reg['POP'] = df_reg['POP'].round()
df_reg['POP'] = df_reg['POP'].astype('Int64')

In [250]:
assert df_long.RegionIndex.nunique() == 11
assert df_long.CercleIndex.nunique() == 75
assert df_long.SEMAINE.nunique() == 53

In [251]:
df_reg.to_csv('data/region_PFA.csv', index=False)
df_long.to_csv('data/PFA.csv', index=False)

In [253]:
df_reg.columns, df_long.columns

(Index(['RegionIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS'], dtype='object'),
 Index(['RegionIndex', 'CercleIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS'], dtype='object'))

### DCD_Maternel

In [269]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='DCD_Maternel', header=3)

In [270]:
cols_to_drop = [col for col in df.columns.tolist() if str(col).startswith('Unnamed')]
df = df.drop(cols_to_drop, axis=1)

In [271]:
df = df.iloc[:86, :-1]

In [272]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])

In [273]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'

In [274]:
cercle_replace_2 = {'ACHOURATT': 'ACHOURAT', 'ALMOUSTRAT': 'ALMOUSTARAT', 'AL_OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 
                   'TAOUDÉNIT': 'TAOUDENIT', 'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 'SAGABARY': 'SAGABARI',
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU'}
df = replace_values(df, ['DISTRICT'], [cercle_replace_2])

In [275]:
id_var_columns = list(df.columns[:3])
value_var_columns = list(df.columns[3:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='CAS')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [276]:
df_long = df_long[df_long['REGION'] != 'MALI']
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_long['YEAR'] = 2020
df_reg['YEAR'] = 2020

In [277]:
assert df_reg.REGION.nunique() == 11
assert df_long.REGION.nunique() == 11
assert df_long.DISTRICT.nunique() == 74

In [278]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [279]:
assert df_reg.RegionIndex.nunique() == 11

In [280]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [281]:
assert df_long.RegionIndex.nunique() == 11
assert df_long.CercleIndex.nunique() == 74
assert df_long.SEMAINE.nunique() == 53

In [283]:
df_long['POP'] = df_long['POP'].round()
df_long['POP'] = df_long['POP'].astype('Int64')
df_reg['POP'] = df_reg['POP'].round()
df_reg['POP'] = df_reg['POP'].astype('Int64')

In [284]:
df_long = df_long[['RegionIndex', 'CercleIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS']]
df_reg = df_reg[['RegionIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS']]

In [285]:
df_reg.to_csv('data/region_DCD_Maternel.csv', index=False)
df_long.to_csv('data/DCD_Maternel.csv', index=False)

### DCD NN

In [301]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='DCD NN', header=3)

In [302]:
df = df.iloc[:95, :-6]

In [303]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])

In [304]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df = replace_values(df, ['DISTRICT'], [cercle_replace_2])

In [305]:
dis = [d for d in df.DISTRICT.unique() if d not in cercle.CERCLE.unique()]
dis.remove('REGION')

In [306]:
df = df[~df.DISTRICT.isin(dis)]

In [307]:
id_var_columns = list(df.columns[:3])
value_var_columns = list(df.columns[3:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='CAS')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [308]:
df_long = df_long[df_long['REGION'] != 'MALI']
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_long['YEAR'] = 2020
df_reg['YEAR'] = 2020

In [309]:
assert df_reg.REGION.nunique() == 11
assert df_long.REGION.nunique() == 11
assert df_long.DISTRICT.nunique() == 74

In [310]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [311]:
assert df_reg.RegionIndex.nunique() == 11

In [312]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [313]:
assert df_long.RegionIndex.nunique() == 11
assert df_long.CercleIndex.nunique() == 74
assert df_long.SEMAINE.nunique() == 53

In [314]:
df_long['POP'] = df_long['POP'].round()
df_long['POP'] = df_long['POP'].astype('Int64')
df_reg['POP'] = df_reg['POP'].round()
df_reg['POP'] = df_reg['POP'].astype('Int64')

In [315]:
df_long = df_long[['RegionIndex', 'CercleIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS']]
df_reg = df_reg[['RegionIndex', 'YEAR', 'POP', 'SEMAINE', 'CAS']]

In [316]:
df_reg.to_csv('data/region_DCD_NN.csv', index=False)
df_long.to_csv('data/DCD_NN.csv', index=False)

### TNN

In [566]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='TNN', header=2)

In [567]:
cols_to_drop = [col for col in df.columns.tolist() if str(col).startswith('Unnamed')]
df = df.drop(cols_to_drop, axis=1)

In [568]:
df = df.iloc[:83, :-8]
df = df.drop('3-28 jours', axis=1)

In [569]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])

In [570]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'

In [571]:
cercle_replace_4 = {'ACHOURATT': 'ACHOURAT', 'AL_OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 
                   'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU', 'SELENGUE': 'SELINGUE', 'NIONIO': 'NIONO', 'ANSONGOU': 'ANSONGO', 'TAOUDÉNI': 'TAOUDENIT'}

In [572]:
df = replace_values(df, ['DISTRICT'], [cercle_replace_4])

In [573]:
id_var_columns = list(df.columns[:2])
value_var_columns = list(df.columns[2:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='NUMBER')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [574]:
df_long.loc[df_long['SEMAINE'] == 'Cas', 'SEMAINE'] = 'Cas.0'
df_long.loc[df_long['SEMAINE'] == 'Décè', 'SEMAINE'] = 'Décè.0'

In [575]:
df_long[['INDICATOR', 'WEEK']] = df_long['SEMAINE'].str.split('.', expand=True)

In [576]:
df_long = df_long.drop(['SEMAINE'], axis=1)
df_long = df_long[['REGION', 'DISTRICT', 'INDICATOR', 'WEEK', 'NUMBER']]
df_long = df_long.rename(columns={'WEEK': 'SEMAINE'})
df_long['SEMAINE'] = df_long['SEMAINE'].astype('int') + 1
df_long['SEMAINE'] = pd.to_numeric(df_long['SEMAINE'])
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE', 'INDICATOR'], axis=0)

In [577]:
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [578]:
def merge_columns(df1, df2, left_on_list, right_on_list, drop_list):
    temp = df1.merge(df2, left_on=left_on_list, right_on=right_on_list)
    assert temp.shape[0] == df1.shape[0]
    temp.drop(drop_list, axis=1, inplace=True)
    return temp

In [579]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [580]:
assert df_reg.RegionIndex.nunique() == 11

In [581]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])

In [582]:
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [583]:
df_long['YEAR'] = 2020
df_reg['YEAR'] = 2020
df_long = df_long[['RegionIndex', 'CercleIndex', 'YEAR', 'INDICATOR', 'SEMAINE', 'NUMBER']]
df_reg = df_reg[['RegionIndex', 'YEAR', 'INDICATOR', 'SEMAINE', 'NUMBER']]

In [584]:
df_long['CercleIndex'] = df_long['CercleIndex'].astype('Int64')

In [585]:
df_reg.to_csv('data/region_TNN.csv', index=False)
df_long.to_csv('data/TNN.csv', index=False)

### Mort né

In [586]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Mort né', header=2)

In [587]:
cols_to_drop = [col for col in df.columns.tolist() if str(col).startswith('Unnamed')]
df = df.drop(cols_to_drop, axis=1)

In [588]:
df = df.iloc[:91, :-5]

In [589]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df = df[df['DISTRICT'] != 'EPH']

In [590]:
dis = [d for d in df.DISTRICT.unique() if d not in cercle.CERCLE.unique()]
dis.remove('REGION')

In [591]:
cercle_replace_3 = {'ACHOURATT': 'ACHOURAT', 'AL_OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 'NIÉNA': 'NIENA',
                   'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 'SAGABARY': 'SAGABARI',
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU', 'SELENGUE': 'SELINGUE', 'NIONIO': 'NIONO', 'ANSONGOU': 'ANSONGO', 'TAOUDÉNI': 'TAOUDENIT'}
df = replace_values(df, ['DISTRICT'], [cercle_replace_3])

In [592]:
id_var_columns = list(df.columns[:2])
value_var_columns = list(df.columns[2:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='NUMBER')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [593]:
df_long.loc[df_long['SEMAINE'] == 'Frais', 'SEMAINE'] = 'Frais.0'
df_long.loc[df_long['SEMAINE'] == 'Mac', 'SEMAINE'] = 'Mac.0'

In [594]:
df_long[['INDICATOR', 'WEEK']] = df_long['SEMAINE'].str.split('.', expand=True)
df_long = df_long.drop(['SEMAINE'], axis=1)
df_long = df_long[['REGION', 'DISTRICT', 'INDICATOR', 'WEEK', 'NUMBER']]
df_long = df_long.rename(columns={'WEEK': 'SEMAINE'})
df_long['SEMAINE'] = df_long['SEMAINE'].astype('int') + 1
df_long['SEMAINE'] = pd.to_numeric(df_long['SEMAINE'])
df = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE', 'INDICATOR'], axis=0)

In [595]:
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [596]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [597]:
assert df_reg.RegionIndex.nunique() == 11

In [598]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [603]:
df_long['NUMBER'] = df_long['NUMBER'].astype('Int64')
df_reg['NUMBER'] = df_reg['NUMBER'].astype('Int64')

In [606]:
df_long['YEAR'] = 2020
df_reg['YEAR'] = 2020
df_long = df_long[['RegionIndex', 'CercleIndex', 'YEAR', 'INDICATOR', 'SEMAINE', 'NUMBER']]
df_reg = df_reg[['RegionIndex', 'YEAR', 'INDICATOR', 'SEMAINE', 'NUMBER']]

In [607]:
df_reg.to_csv('data/region_mort_ne.csv', index=False)
df_long.to_csv('data/mort_ne.csv', index=False)

In [608]:
df_long.INDICATOR.unique()

array(['Frais', 'Mac'], dtype=object)

### Palu

In [692]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Palu', header=[1,2])

In [693]:
df = df.iloc[:53, :56]
df = df.T
df = df.iloc[1:]
df.reset_index(inplace=True)
df = df.rename(columns={'level_0': 'REGION', 'level_1': 'INDICATOR'})

In [694]:
id_var_columns = list(df.columns[:2])
value_var_columns = list(df.columns[2:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='VALUE')
df_long = df_long.sort_values(['REGION', 'SEMAINE'], axis=0)

In [695]:
df_long = clean_string(df_long, ['REGION', 'INDICATOR'])
df_long['SEMAINE'] += 1
df_long_shape = df_long.shape

In [696]:
region_replace = {'SÉGOU': 'SEGOU', 'MÉNAKA': 'MENAKA', 'TAOUDÉNI': 'TAOUDENIT'}
df_long = replace_values(df_long, ['REGION'], [region_replace])
assert df_long.REGION.nunique() == regions.REGION.nunique()

In [697]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = reorder_columns(df_long, -1)
assert df_long.shape == df_long_shape

In [698]:
df_long['SEMAINE'] = df_long['SEMAINE'].astype('Int64')
df_long['VALUE'] = pd.to_numeric(df_long['VALUE'], errors='coerce')
df_long['VALUE'] = df_long['VALUE'].astype('Int64')

In [699]:
df_long['YEAR'] = 2020
df_long = df_long[['RegionIndex', 'YEAR', 'INDICATOR', 'SEMAINE', 'VALUE']]

In [700]:
df_long.to_csv('data/region_palu.csv', index=False)