In [1]:
import numpy as np
import pandas as pd
import os
from helper_functions import *
from herams_helper_functions import *

In [2]:
def reorder_columns(data, colnum):
    cols = data.columns.tolist()
    cols = cols[colnum:] + cols[:colnum]
    data = data[cols]
    return data

### Meningite

In [3]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Meningite ', header=4)

In [4]:
df = df.iloc[:87, :-12]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [5]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)

In [6]:
cercle_replace = {'COMMUNE 1': 'COMMUNE I', 'COMMUNE 2': 'COMMUNE II', 'COMMUNE 3': 'COMMUNE III', 'COMMUNE 4': 'COMMUNE IV', 'COMMUNE 5': 'COMMUNE V', 
                  'COMMUNE 6': 'COMMUNE VI', 'ACHOURATT': 'ACHOURAT', 'AÏBEBARA': 'ABEIBARA', 'ALMOUSTRAT': 'ALMOUSTARAT', 'AL_OURCH': 'AL-OURCHE',
                  'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 'GOURMA RHAROUS': 'GOURMA-RHAROUS', 'TAOUDÉNI': 'TAOUDENIT', 'FOUM_ALBA': 'FOUM-ELBA',
                  'NIÈNA': 'NIENA', 'TINDERMEN': 'TIDERMENE', 'SÉFETO': 'SEFETO', 'MÉNAKA': 'MENAKA', 'TINESSAKO': 'TIN-ESSAKO'}
region_replace = {'SÉGOU': 'SEGOU', 'MÉNAKA': 'MENAKA', 'TAOUDÉNI': 'TAOUDENIT', 'GENERAL': 'MALI'}
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [7]:
regions = pd.read_csv('data/regions.csv')
cercle = pd.read_csv('data/cercle.csv')
regions = get_serial_column(regions, ['REGION'], 'RegionIndex')
cercle = get_serial_column(cercle, ['RegionIndex', 'CERCLE'], 'CercleIndex')

In [8]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [9]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [10]:
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [11]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [12]:
df_reg.to_csv('data/region_meningite.csv', index=False)
df_long.to_csv('data/meningite.csv', index=False)

### Rougeole

In [13]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Rougeole', header=3)

In [14]:
df = df.iloc[:87, :-10]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [15]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)

In [16]:
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [17]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [18]:
df_long = df_long[(df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [19]:
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [20]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [21]:
df_reg.to_csv('data/region_rougeole.csv', index=False)
df_long.to_csv('data/rougeole.csv', index=False)

### Fièvre Jaune

In [22]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Fièvre Jaune', header=4)

In [23]:
df = df.iloc[:87, :-15]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [24]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [25]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [26]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [27]:
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [28]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [29]:
df_reg.to_csv('data/region_fievre_jaune.csv', index=False)
df_long.to_csv('data/fievre_jaune.csv', index=False)

### Cholera

In [30]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Cholera ', header=4)

In [31]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [32]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])

In [33]:
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [34]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [35]:
df_reg.to_csv('data/region_cholera.csv', index=False)
df_long.to_csv('data/cholera.csv', index=False)

### Rage Humaine

In [36]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Rage Humaine ', header=4)

In [37]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [38]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [39]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [40]:
df_reg.to_csv('data/region_rage_humaine.csv', index=False)
df_long.to_csv('data/rage_humaine.csv', index=False)

### Charbon

In [41]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Charbon', header=3)

In [42]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [43]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)

In [44]:
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [45]:
df_reg.to_csv('data/region_charbon.csv', index=False)
df_long.to_csv('data/charbon.csv', index=False)

### Piqures Serpents 

In [46]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='piqures Serpents ', header=3)

In [47]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [48]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [49]:
df_reg.to_csv('data/region_piqures_serpents.csv', index=False)
df_long.to_csv('data/piqures_serpents.csv', index=False)

###  Morssures Chien 

In [50]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name=' Morssures Chien ', header=3)

In [51]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [52]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [53]:
df_reg.to_csv('data/region_morssures_chien.csv', index=False)
df_long.to_csv('data/morssures_chien.csv', index=False)

### Piqures Scorpion 

In [54]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='piqures Scorpion ', header=3)

In [55]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [56]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [57]:
df_reg.to_csv('data/region_piqures_scorpion.csv', index=False)
df_long.to_csv('data/piqures_scorpion.csv', index=False)

### Diarrhée rouge

In [58]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Diarrhée rouge', header=3)

In [59]:
df = df.iloc[:87, :-4]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [60]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [61]:
df_reg.to_csv('data/region_diarhee_rouge.csv', index=False)
df_long.to_csv('data/diarhee_rouge.csv', index=False)

### COVID-19

In [62]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='COVID19', header=3)

In [63]:
df = df.iloc[:87, :-7]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [64]:
df.loc[df['REGION'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df.loc[df['REGION'].str.startswith('TOTAL'), 'REGION'] = df['REGION'].str.slice(6,)
df = replace_values(df, ['DISTRICT', 'REGION'], [cercle_replace, region_replace])
id_var_columns = list(df.columns[:5])
value_var_columns = list(df.columns[5:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='INDICATOR', value_name='WEEKLY VALUE')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'INDICATOR'], axis=0)
df_long = df_long.reset_index(drop=True)
df_long = df_long[(df_long['INDICATOR'] != 'CAS00') & (df_long['REGION'] != 'MALI')]
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']
df_reg = df_reg.drop(['DISTRICT', 'ISOCODE'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [65]:
df_reg.to_csv('data/region_covid19.csv', index=False)
df_long.to_csv('data/covid19.csv', index=False)

### PFA

In [66]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='PFA', header=3)

In [67]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})

In [68]:
df = df.iloc[:87, :-5]
df = clean_string(df, ['REGION', 'DISTRICT'])

In [69]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'

In [70]:
cercle_replace_ = {'ACHOURATT': 'ACHOURAT', 'ALMOUSTRAT': 'ALMOUSTARAT', 'AL-OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 
                   'TAOUDÉNIT': 'TAOUDENIT', 'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 'SAGABARY': 'SAGABARI',
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU'}

In [71]:
df = replace_values(df, ['DISTRICT'], [cercle_replace_])

In [72]:
id_var_columns = list(df.columns[:3])
value_var_columns = list(df.columns[3:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='CAS')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [73]:
assert df_long.REGION.nunique() == 12
assert df_long.DISTRICT.nunique() == 76

In [74]:
df_long = df_long[df_long['REGION'] != 'MALI']
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [75]:
assert df_reg.REGION.nunique() == 11
assert df_long.REGION.nunique() == 11
assert df_long.DISTRICT.nunique() == 75

In [76]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [77]:
assert df_reg.RegionIndex.nunique() == 11

In [78]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [79]:
assert df_long.RegionIndex.nunique() == 11
assert df_long.CercleIndex.nunique() == 75
assert df_long.SEMAINE.nunique() == 53

In [80]:
df_reg.to_csv('data/region_PFA.csv', index=False)
df_long.to_csv('data/PFA.csv', index=False)

### DCD_Maternel

In [81]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='DCD_Maternel', header=3)

In [82]:
cols_to_drop = [col for col in df.columns.tolist() if str(col).startswith('Unnamed')]
df = df.drop(cols_to_drop, axis=1)

In [83]:
df = df.iloc[:86, :-1]

In [84]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])

In [85]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'

In [86]:
cercle_replace_2 = {'ACHOURATT': 'ACHOURAT', 'ALMOUSTRAT': 'ALMOUSTARAT', 'AL_OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 
                   'TAOUDÉNIT': 'TAOUDENIT', 'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 'SAGABARY': 'SAGABARI',
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU'}
df = replace_values(df, ['DISTRICT'], [cercle_replace_2])

In [87]:
id_var_columns = list(df.columns[:3])
value_var_columns = list(df.columns[3:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='CAS')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [88]:
df_long = df_long[df_long['REGION'] != 'MALI']
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [89]:
assert df_reg.REGION.nunique() == 11
assert df_long.REGION.nunique() == 11
assert df_long.DISTRICT.nunique() == 74

In [90]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [91]:
assert df_reg.RegionIndex.nunique() == 11

In [92]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [93]:
assert df_long.RegionIndex.nunique() == 11
assert df_long.CercleIndex.nunique() == 74
assert df_long.SEMAINE.nunique() == 53

In [94]:
df_reg.to_csv('data/region_DCD_Maternel.csv', index=False)
df_long.to_csv('data/DCD_Maternel.csv', index=False)

### DCD NN

In [95]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='DCD NN', header=3)

In [96]:
df = df.iloc[:95, :-6]

In [97]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])

In [98]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df = replace_values(df, ['DISTRICT'], [cercle_replace_2])

In [99]:
dis = [d for d in df.DISTRICT.unique() if d not in cercle.CERCLE.unique()]
dis.remove('REGION')

In [100]:
df = df[~df.DISTRICT.isin(dis)]

In [101]:
id_var_columns = list(df.columns[:3])
value_var_columns = list(df.columns[3:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='CAS')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [102]:
df_long = df_long[df_long['REGION'] != 'MALI']
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [103]:
assert df_reg.REGION.nunique() == 11
assert df_long.REGION.nunique() == 11
assert df_long.DISTRICT.nunique() == 74

In [104]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [105]:
assert df_reg.RegionIndex.nunique() == 11

In [106]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [107]:
assert df_long.RegionIndex.nunique() == 11
assert df_long.CercleIndex.nunique() == 74
assert df_long.SEMAINE.nunique() == 53

In [108]:
df_reg.to_csv('data/region_DCD_NN.csv', index=False)
df_long.to_csv('data/DCD_NN.csv', index=False)

### TNN

In [231]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='TNN', header=2)

In [232]:
cols_to_drop = [col for col in df.columns.tolist() if str(col).startswith('Unnamed')]
df = df.drop(cols_to_drop, axis=1)

In [233]:
df = df.iloc[:83, :-8]
df = df.drop('3-28 jours', axis=1)

In [234]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])

In [235]:
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'

In [237]:
cercle_replace_4 = {'ACHOURATT': 'ACHOURAT', 'AL_OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 
                   'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU', 'SELENGUE': 'SELINGUE', 'NIONIO': 'NIONO', 'ANSONGOU': 'ANSONGO', 'TAOUDÉNI': 'TAOUDENIT'}

In [238]:
df = replace_values(df, ['DISTRICT'], [cercle_replace_4])

In [240]:
id_var_columns = list(df.columns[:2])
value_var_columns = list(df.columns[2:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='NUMBER')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [241]:
df_long.loc[df_long['SEMAINE'] == 'Cas', 'SEMAINE'] = 'Cas.0'
df_long.loc[df_long['SEMAINE'] == 'Décè', 'SEMAINE'] = 'Décè.0'

In [242]:
df_long[['INDICATOR', 'WEEK']] = df_long['SEMAINE'].str.split('.', expand=True)

In [243]:
df_long = df_long.drop(['SEMAINE'], axis=1)
df_long = df_long[['REGION', 'DISTRICT', 'INDICATOR', 'WEEK', 'NUMBER']]
df_long = df_long.rename(columns={'WEEK': 'SEMAINE'})
df_long['SEMAINE'] = df_long['SEMAINE'].astype('int') + 1
df_long['SEMAINE'] = pd.to_numeric(df_long['SEMAINE'])
df = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE', 'INDICATOR'], axis=0)

In [244]:
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [245]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [246]:
assert df_reg.RegionIndex.nunique() == 11

In [247]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])

In [249]:
def merge_columns(df1, df2, left_on_list, right_on_list, drop_list):
    temp = df1.merge(df2, left_on=left_on_list, right_on=right_on_list, how='left')
    assert temp.shape[0] == df1.shape[0]
    temp.drop(drop_list, axis=1, inplace=True)
    return temp

In [250]:
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [251]:
df_reg.to_csv('data/region_TNN.csv', index=False)
df_long.to_csv('data/TNN.csv', index=False)

### Mort né

In [123]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Mort né', header=2)

In [124]:
cols_to_drop = [col for col in df.columns.tolist() if str(col).startswith('Unnamed')]
df = df.drop(cols_to_drop, axis=1)

In [125]:
df = df.iloc[:91, :-5]

In [126]:
df = df.rename(columns={'Region': 'REGION', 'Districts Sanitaires': 'DISTRICT', 'Population': 'POP'})
df = clean_string(df, ['REGION', 'DISTRICT'])
df.loc[df['DISTRICT'].str.startswith('TOTAL'), 'DISTRICT'] = 'REGION'
df = df[df['DISTRICT'] != 'EPH']

In [127]:
dis = [d for d in df.DISTRICT.unique() if d not in cercle.CERCLE.unique()]
dis.remove('REGION')

In [128]:
cercle_replace_3 = {'ACHOURATT': 'ACHOURAT', 'AL_OURCH': 'AL-OURCHE', 'ARAWANE': 'ARAOUANE', 'BARAOUELI': 'BAROUELI', 'NIÉNA': 'NIENA',
                   'FOUM_ALBA': 'FOUM-ELBA', 'TINDERMEN': 'TIDERMENE', 'MÉNAKA': 'MENAKA', 'SAGABARY': 'SAGABARI',
                   'BADIANGARA': 'BANDIAGARA', 'TENINKOU': 'TENENKOU', 'SELENGUE': 'SELINGUE', 'NIONIO': 'NIONO', 'ANSONGOU': 'ANSONGO', 'TAOUDÉNI': 'TAOUDENIT'}
df = replace_values(df, ['DISTRICT'], [cercle_replace_3])

In [129]:
id_var_columns = list(df.columns[:2])
value_var_columns = list(df.columns[2:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='NUMBER')
df_long = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE'], axis=0)
df_long = df_long.reset_index(drop=True)

In [130]:
df_long.loc[df_long['SEMAINE'] == 'Frais', 'SEMAINE'] = 'Frais.0'
df_long.loc[df_long['SEMAINE'] == 'Mac', 'SEMAINE'] = 'Mac.0'

In [131]:
df_long[['INDICATOR', 'WEEK']] = df_long['SEMAINE'].str.split('.', expand=True)
df_long = df_long.drop(['SEMAINE'], axis=1)
df_long = df_long[['REGION', 'DISTRICT', 'INDICATOR', 'WEEK', 'NUMBER']]
df_long = df_long.rename(columns={'WEEK': 'SEMAINE'})
df_long['SEMAINE'] = df_long['SEMAINE'].astype('int') + 1
df_long['SEMAINE'] = pd.to_numeric(df_long['SEMAINE'])
df = df_long.sort_values(['REGION', 'DISTRICT', 'SEMAINE', 'INDICATOR'], axis=0)

In [132]:
df_reg = df_long[df_long['DISTRICT'] == 'REGION']
df_long = df_long[df_long['DISTRICT'] != 'REGION']

In [133]:
df_reg = df_reg.drop(['DISTRICT'], axis=1)
df_reg = df_reg.reset_index(drop=True)
df_reg = merge_columns(df_reg, regions, ['REGION'], ['REGION'], ['REGION'])
df_reg = reorder_columns(df_reg, -1)

In [134]:
assert df_reg.RegionIndex.nunique() == 11

In [135]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = merge_columns(df_long, cercle, ['RegionIndex', 'DISTRICT'], ['RegionIndex', 'CERCLE'], ['CERCLE', 'DISTRICT'])
df_long = reorder_columns(df_long, -2)

In [136]:
df_reg.to_csv('data/region_mort_ne.csv', index=False)
df_long.to_csv('data/mort_ne.csv', index=False)

### Palu

In [330]:
df = pd.read_excel('data/MLMDO_2020_S_53.xls', sheet_name='Palu', header=[1,2])

In [331]:
df = df.iloc[:53, :56]
df = df.T
df = df.iloc[1:]
df.reset_index(inplace=True)
df = df.rename(columns={'level_0': 'REGION', 'level_1': 'INDICATOR'})

In [332]:
id_var_columns = list(df.columns[:2])
value_var_columns = list(df.columns[2:])
df_long = pd.melt(df, id_vars=id_var_columns, value_vars=value_var_columns, var_name='SEMAINE', value_name='VALUE')
df_long = df_long.sort_values(['REGION', 'SEMAINE'], axis=0)

In [333]:
df_long = clean_string(df_long, ['REGION', 'INDICATOR'])
df_long['SEMAINE'] += 1
df_long_shape = df_long.shape

In [334]:
region_replace = {'SÉGOU': 'SEGOU', 'MÉNAKA': 'MENAKA', 'TAOUDÉNI': 'TAOUDENIT'}
df_long = replace_values(df_long, ['REGION'], [region_replace])
assert df_long.REGION.nunique() == regions.REGION.nunique()

In [335]:
df_long = merge_columns(df_long, regions, ['REGION'], ['REGION'], ['REGION'])
df_long = reorder_columns(df_long, -1)
assert df_long.shape == df_long_shape

In [336]:
df_long.to_csv('data/region_palu.csv', index=False)