# This Notebook is for data processing development

Here we will calculate the model parameters:
- notification_rates
- time_between
- ges_diagnostic_rates

Sets:
- medical_centers: [1, 2, ...]
- medics: [1, 2, ...]
- modules: [1, 2]
- months: [1, 2, ..., 12]
- days: [1, ..., 24]
- medical_centers_boxes: {1: [1, 2, ...], 2: [1, 2, ...]}


In [9]:
import pandas as pd
import requests
import json

In [10]:
data = pd.read_csv('./../data/database.csv', sep=';')

In [11]:
len(data)

27853

In [12]:
data.head()

Unnamed: 0,ID_PROBLEMA,ID_PACIENTE,ID PACIENTE,ID MÉDICO,ID_LUGAR,CENTRO,FECHA_EVOLUCION,ETIQUETA_GES,ID_AGRUPADOR,POTENCIAL_1,POTENCIAL_2,POTENCIAL_3
0,17648485,211402,1399,1,21.0,CMD PEDRO DE VALDIVIA,29/07/2019,POSIBLE,59.0,56.0,,
1,17644905,5845159,1399,1,21.0,CMD PEDRO DE VALDIVIA,29/07/2019,NO GES,77.0,56.0,,
2,17730364,10199783,1399,1,21.0,CMD PEDRO DE VALDIVIA,29/07/2019,POSIBLE,56.0,,,
3,17677869,10824784,5268,1,21.0,CMD PEDRO DE VALDIVIA,02/08/2019,,,76.0,,
4,17656698,3767945,5751,1,21.0,CMD PEDRO DE VALDIVIA,09/08/2019,NO GES,56.0,,,


# Add id to medical centers

In [13]:
def add_id_to_medical_centers(data):
    medical_centers = data.CENTRO.unique()
    # fix string format
    for i in range(len(medical_centers)):
        medical_centers[i] = medical_centers[i].replace('\xa0', ' ')

    enumerated = dict()
    for mc_id, name in enumerate(medical_centers, start=1):
        enumerated[name] = mc_id

    for index, row in data.iterrows():
        for key, value in enumerated.items():
            if row['CENTRO'] == key:
                data.loc[index, 'ID_LUGAR'] = value
    return medical_centers, enumerated
    

In [14]:
medical_centers, enumerated = add_id_to_medical_centers(data)

# Connect to Google Maps API to get distances between medical centers

In [15]:
def get_data_from_maps_api(medical_centers, enumerated):
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=metric'
    headers = {
        'authority': 'maps.googleapis.com',
        'method': 'GET',
        'scheme': 'https',
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'es-419,es;q=0.9,en;q=0.8',
        'sec-fetch-site': 'none',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
    }


    # get all medical center combinations
    medical_centers_combinations = list()
    for i in medical_centers:
        for j in medical_centers:
            medical_centers_combinations.append((i, j))

    time_between = dict()

    for pair in medical_centers_combinations:
        origin = pair[0] + ' REDSALUD, CHILE'
        destination = pair[1] + ' REDSALUD, CHILE'
        request_text = url + '&origins=' + origin + '&destinations=' + destination + '&key=AIzaSyBVVOTvCC_sbViOkqq8q64563ss5zafdAM'
        req = requests.get(request_text, headers=headers)
        res = req.json()
        time_between[(enumerated[pair[0]], enumerated[pair[1]])] = int(res['rows'][0]['elements'][0]['duration']['value'])/3600
    return time_between


In [16]:
time_between = get_data_from_maps_api(medical_centers, enumerated)

# Filter medical centers combinations by distance

In [17]:
def get_lower_equal_than(data, k):
    invalid = list()
    for key in data.keys():
        if data[key] > k:
            invalid.append(key)

    for key in invalid:
        del data[key]
    return data

In [19]:
medical_centers_filtered = get_lower_equal_than(time_between, 2)

# Relation between medical centers combinations and days

In [30]:
def get_medical_centers_by_day(medical_centers, days):
    medical_centers_by_day = list()
    for day in days:
        for medical_center_combination in medical_centers.keys():
            medical_centers_by_day.append((day, medical_center_combination))
    return medical_centers_by_day

In [31]:
days = list(range(1,7))
medical_centers_by_day = get_medical_centers_by_day(medical_centers_filtered, days)

# Medic notification rate
| X | NULL columns | NOT NULL columns |
| --- | --- | --- |
| Notified |  | ID_AGRUPADOR |
| Not notified but should have been notified | ID_AGRUPADOR | POTENCIAL_1 or POTENCIAL_2 or POTENCIAL_3 |
| Not GES | ID_AGRUPADOR and POTENCIAL_1 and POTENCIAL_2 and POTENCIAL_3 | |