# This Notebook is for data processing development

Here we will calculate the model parameters:
- [X] notification_rates
- [X] time_between

Sets:
- [X] medical_centers: [1, 2, ...]
- [X] medics: [1, 2, ...]
- [X] modules: [1, 2]
- [X] months: [1, 2, ..., 12]
- [X] days: [1, ..., 24]
- [X] medical_centers_boxes: {1: [1, 2, ...], 2: [1, 2, ...]}


In [2]:
import pandas as pd
import requests
import json
from math import isnan

In [3]:
data = pd.read_csv('./../data/database.csv', sep=';')

In [4]:
len(data)

27853

In [5]:
data.head()

Unnamed: 0,ID_PROBLEMA,ID_PACIENTE,ID PACIENTE,ID_MEDICO,ID_LUGAR,CENTRO,FECHA_EVOLUCION,ETIQUETA_GES,ID_AGRUPADOR,POTENCIAL_1,POTENCIAL_2,POTENCIAL_3
0,17648485,211402,1399,1,21.0,CMD PEDRO DE VALDIVIA,29/07/2019,POSIBLE,59.0,56.0,,
1,17644905,5845159,1399,1,21.0,CMD PEDRO DE VALDIVIA,29/07/2019,NO GES,77.0,56.0,,
2,17730364,10199783,1399,1,21.0,CMD PEDRO DE VALDIVIA,29/07/2019,POSIBLE,56.0,,,
3,17677869,10824784,5268,1,21.0,CMD PEDRO DE VALDIVIA,02/08/2019,,,76.0,,
4,17656698,3767945,5751,1,21.0,CMD PEDRO DE VALDIVIA,09/08/2019,NO GES,56.0,,,


# Sets

## Add id to medical centers and define **medical_centers** set

In [6]:
def add_id_to_medical_centers(data):
    medical_centers = data.CENTRO.unique()
    # fix string format
    for i in range(len(medical_centers)):
        medical_centers[i] = medical_centers[i].replace('\xa0', ' ')

    enumerated = dict()
    for mc_id, name in enumerate(medical_centers, start=1):
        enumerated[name] = mc_id

    for index, row in data.iterrows():
        for key, value in enumerated.items():
            if row['CENTRO'] == key:
                data.loc[index, 'ID_LUGAR'] = value
    return medical_centers, enumerated
    

In [7]:
medical_centers, enumerated = add_id_to_medical_centers(data)

## Modules, days and months

In [8]:
modules = [1, 2]
days = list(range(1, 25))
months = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

## Medics

In [9]:
medics = data.ID_MEDICO.unique().tolist()

## Medical center boxes

In [11]:
enumerated

{'CD ELQUI': 21,
 'CD INDEPENDENCIA': 32,
 'CD KENNEDY': 24,
 'CD LA REINA': 31,
 'CD PASEO ESTACION': 33,
 'CD PEÑALOLEN': 34,
 'CD PUENTE ALTO': 30,
 'CMD ALAMEDA': 3,
 'CMD ANTOFAGASTA': 15,
 'CMD ARICA': 28,
 'CMD CALAMA': 27,
 'CMD CHILLAN': 20,
 'CMD CONCEPCION': 12,
 'CMD CONCHALI': 23,
 'CMD IQUIQUE': 2,
 'CMD LA FLORIDA': 5,
 'CMD LOS ANGELES': 25,
 'CMD MAIPU': 11,
 'CMD MUELLE BARON': 10,
 'CMD OSORNO': 13,
 'CMD PEDRO DE VALDIVIA': 1,
 'CMD PUENTE ALTO': 17,
 'CMD PUERTO MONTT': 18,
 'CMD PUNTA ARENAS': 22,
 'CMD QUILICURA': 7,
 'CMD QUILPUE': 26,
 'CMD RANCAGUA': 16,
 'CMD SAN BERNARDO': 9,
 'CMD SAN MIGUEL': 14,
 'CMD TALCA': 29,
 'CMD TEMUCO CENTRO': 19,
 'CMD VALDIVIA': 6,
 'CMD VIÑA DEL MAR': 8,
 'CMD ÑUÑOA': 4}

In [30]:
boxes_data = pd.read_csv('./../data/boxes-database.csv', sep=';')

In [31]:
boxes_data

Unnamed: 0,ID_LUGAR,CENTRO,N_BOXES
0,1,CMD PEDRO DE VALDIVIA,32
1,2,CMD IQUIQUE,11
2,3,CMD ALAMEDA,43
3,4,CMD ÑUÑOA,34
4,5,CMD LA FLORIDA,31
5,6,CMD VALDIVIA,19
6,7,CMD QUILICURA,21
7,8,CMD VIÑA DEL MAR,17
8,9,CMD SAN BERNARDO,12
9,10,CMD MUELLE BARON,13


In [34]:
def get_boxes(enumerated, boxes_data):
    
    boxes = {}
    for index, row in boxes_data.iterrows():
        boxes[enumerated[row['CENTRO']]] = row['N_BOXES']
    
    return boxes
    
get_boxes(enumerated, boxes_data)

{1: 32,
 2: 11,
 3: 43,
 4: 34,
 5: 31,
 6: 19,
 7: 21,
 8: 17,
 9: 12,
 10: 13,
 11: 42,
 12: 22,
 13: 12,
 14: 29,
 15: 30,
 16: 23,
 17: 11,
 18: 15,
 19: 23,
 20: 32,
 21: 13,
 22: 24,
 23: 14,
 24: 19,
 25: 12,
 26: 11,
 27: 17,
 28: 12,
 29: 38,
 30: 24,
 31: 13,
 32: 19,
 33: 22,
 34: 25}

# Parameters

## Connect to Google Maps API to get times between medical centers

In [10]:
def get_data_from_maps_api(medical_centers, enumerated):
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=metric'
    headers = {
        'authority': 'maps.googleapis.com',
        'method': 'GET',
        'scheme': 'https',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'es-419,es;q=0.9,en;q=0.8',
        'sec-fetch-site': 'none',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
    }


    # get all medical center combinations
    medical_centers_combinations = list()
    for i in medical_centers:
        for j in medical_centers:
            medical_centers_combinations.append((i, j))

    time_between = dict()

    for pair in medical_centers_combinations:
        origin = pair[0] + ' REDSALUD, CHILE'
        destination = pair[1] + ' REDSALUD, CHILE'
        request_text = url + '&origins=' + origin + '&destinations=' + destination + '&key=AIzaSyBVVOTvCC_sbViOkqq8q64563ss5zafdAM'
        req = requests.get(request_text, headers=headers)
        res = req.json()
        time_between[(enumerated[pair[0]], enumerated[pair[1]])] = int(res['rows'][0]['elements'][0]['duration']['value'])/(60*60) # from seconds to hours
    return time_between


In [11]:
time_between = get_data_from_maps_api(medical_centers, enumerated)

## Medic notification rate
| X | NULL columns | NOT NULL columns |
| --- | --- | --- |
| Notified |  | ID_AGRUPADOR |
| Not notified but should have been notified | ID_AGRUPADOR | POTENCIAL_1 or POTENCIAL_2 or POTENCIAL_3 |
| Not GES | ID_AGRUPADOR and POTENCIAL_1 and POTENCIAL_2 and POTENCIAL_3 | |

In [12]:
def get_notification_rates(data):
    medic_stats = dict()
    for med_id in data.ID_MEDICO.unique():
        medic_stats[med_id] = {'notified': 0, 'not_notified': 0, 'not_ges': 0}
    for index, row in data.iterrows():
        # notified
        if not isnan(row['ID_AGRUPADOR']):
            medic_stats[row['ID_MEDICO']]['notified'] += 1
        # not notified but should
        elif isnan(row['ID_AGRUPADOR']) and (not isnan(row['POTENCIAL_1']) or not isnan(row['POTENCIAL_2']) or not isnan(row['POTENCIAL_3'])):
            medic_stats[row['ID_MEDICO']]['not_notified'] += 1
        elif not isnan(row['ID_AGRUPADOR']) and not isnan(row['POTENCIAL_1']) and not isnan(row['POTENCIAL_2']) and not isnan(row['POTENCIAL_3']):
            medic_stats[row['ID_MEDICO']]['not_ges'] += 1
    notification_rates = dict()
    for key, value in medic_stats.items():
        notification_rates[key] = value['notified']/(value['not_notified'] + value['notified']) if value['not_notified'] != 0 else 1.0
    return notification_rates

In [13]:
notification_rates = get_notification_rates(data)