# Liberar DNI - Notebook de pruebas

Este notebook requiere las credenciales de tu DNI para iniciar sesión en el portal de cita previa en un archivo `.env`.
Simplemente crea una copia de `.env.sample` llamada `.env` y rellena los datos con los que figuran en tu DNI.

In [None]:
import pandas as pd

import geopandas as gpd
import geopy

from geopy.geocoders import Nominatim


from selenium.webdriver import Chrome
from tqdm.auto import tqdm

from utils import (extraer_datos_unidad, extract_hours,
                   get_remaining_days, get_remaining_months, login, URL_LOGIN, URL_DNI, URL_PASAPORTE)

In [None]:
tqdm.pandas()

In [None]:
driver = Chrome()

driver.get(URL_LOGIN)

## Login

In [None]:
login(driver, tesseract_check=True)

# DNI Extract

## Provinces

In [None]:
driver.get(URL_DNI)

In [None]:
provinces = {}

map_elem = driver.find_element_by_id('Map')
for map_child in map_elem.find_elements_by_css_selector('*'):
    name = map_child.get_attribute('alt')
    url = map_child.get_attribute('href')
    
    provinces[name] = url

In [None]:
provinces

## Unidades

In [None]:
# driver.get(URL_DNI)
# 
# unidades_spain = []
# 
# for province_name, province_url in tqdm(provinces.items()):
#     
#     driver.get(province_url)
#     
#     listas = driver.find_elements_by_class_name('lista')
#     unidades_provincia = [extraer_datos_unidad(ud, province_name) for lista in listas for ud in lista.find_elements_by_tag_name('div')]
#     
#     unidades_spain.extend(unidades_provincia)
# 
# unidades_spain = pd.DataFrame(unidades_spain)    
# unidades_spain.to_csv('unidades_spain.csv', index = False)

In [None]:
# sanity check - estan todas las provincias de spain?
# unidades_spain.province.unique()


# abrir csv unidades spain
unidades_spain = pd.read_csv("unidades_spain.csv")

## Geocoding

In [None]:
locator = Nominatim(user_agent="myGeocoder")

def geocode_row(row):
    location = locator.geocode(row.direccion)
    
    row['latitude'] = location.latitude if location else None
    row['longitude'] = location.longitude if location else None
    
    return row

In [None]:
unidades_spain = unidades_spain.progress_apply(geocode_row, axis=1)

In [None]:
# unidades_spain.to_csv('unidades_spain.csv', index = False)
# unidades_spain

In [None]:
unidades_spain[:10]

## Horas

El comportamiento aquí siempre es igual: el mes y día seleccionados (por defecto, el primero) no tienen enlace, los siguientes sí.

In [None]:
driver.get(unidad['url'])

First try current month, then any remaining months

In [None]:
citas = extract_hours(driver, province_name, unidad_name)

month_days = get_remaining_days(driver)
for date, date_url in tqdm(month_days.items(), desc='Current month'):
    driver.get(date_url)
    citas.extend(extract_hours(driver, province_name, unidad_name))
    
next_months = get_remaining_months(driver)
for month, month_url in next_months.items():
    driver.get(month_url)
    month_days = get_remaining_days(driver)
    for date, date_url in tqdm(month_days.items(), desc=month):
        driver.get(date_url)
        citas.extend(extract_hours(driver, province_name, unidad_name))

In [None]:
citas_df = pd.DataFrame(citas)
citas_df.head()

In [None]:
citas_df.to_csv('citas_coruna.csv', index=False)