# Obtención de datos mediante APIs

In [27]:
import requests
import time
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from unidecode import unidecode

## Scraping para obtener nombre de los municipios

In [28]:
# Set up the WebDriver
driver = webdriver.Chrome()

# Open the Python website
driver.get(
    "https://cuentame.inegi.org.mx/monografias/informacion/oax/territorio/div_municipal.aspx?tema=me&e=20"
)

# Wait to url is ready
time.sleep(5)

# Read and save information
oax_municipalities = []
for i in range(1, 571):
    state = driver.find_element(
        By.XPATH, "//*[@id='keywords2']/tbody/tr[" + str(i) + "]/td[2]"
    )
    # Save and clean string
    oax_municipalities.append(unidecode(state.text))

# Close the browser
driver.quit()

In [29]:
municipalities = oax_municipalities[0:30]
municipalities

['Abejones',
 'Acatlan de Perez Figueroa',
 'Asuncion Cacalotepec',
 'Asuncion Cuyotepeji',
 'Asuncion Ixtaltepec',
 'Asuncion Nochixtlan',
 'Asuncion Ocotlan',
 'Asuncion Tlacolulita',
 'Ayotzintepec',
 'El Barrio de la Soledad',
 'Calihuala',
 'Candelaria Loxicha',
 'Cienega de Zimatlan',
 'Ciudad Ixtepec',
 'Coatecas Altas',
 'Coicoyan de las Flores',
 'La Compania',
 'Concepcion Buenavista',
 'Concepcion Papalo',
 'Constancia del Rosario',
 'Cosolapa',
 'Cosoltepec',
 'Cuilapam de Guerrero',
 'Cuyamecalco Villa de Zaragoza',
 'Chahuites',
 'Chalcatongo de Hidalgo',
 'Chiquihuitlan de Benito Juarez',
 'Heroica Ciudad de Ejutla de Crespo',
 'Eloxochitlan de Flores Magon',
 'El Espinal']

## Envío de solicitudes a la API

In [30]:
def getApiDataOax(endpoint, payload, municipalities, getFirst):
    data_collected = []
    for municipality in municipalities:
        payload["q"] = "'name':'" + municipality + "','region':'oaxaca'"
        try:
            time.sleep(5)
            r = requests.get(url=endpoint, params=payload)
            if r.status_code == 200:
                json = r.json()
                if getFirst:
                    data_collected.append(json[0])
                else:
                    data_collected.append(json)
        except:
            continue
    return data_collected

In [31]:
endpoint = "http://api.weatherapi.com/v1/"
payload = {"key": "cf111e82db2c47119ea162327242212"}

In [32]:
search = "/search.json"

In [33]:
data_search_oax = getApiDataOax(
    endpoint=endpoint + search,
    payload=payload,
    municipalities=municipalities,
    getFirst=True,
)

In [34]:
current = "/current.json"

In [35]:
data_current_oax = getApiDataOax(
    endpoint=endpoint + current,
    payload=payload,
    municipalities=municipalities,
    getFirst=False,
)

In [36]:
astronomy = "/astronomy.json"

In [37]:
data_astronomy_oax = getApiDataOax(
    endpoint=endpoint + astronomy,
    payload=payload,
    municipalities=municipalities,
    getFirst=False,
)

## Normalización de datos Json y Conversion a DataFrame

In [38]:

normalized_data_search_oax = pd.json_normalize(data_search_oax)
df_search_temp = pd.DataFrame.from_dict(normalized_data_search_oax).add_prefix("location.")
df_search = df_search_temp.drop(["location.lat", "location.lon"], axis=1)
df_search.head()

Unnamed: 0,location.id,location.name,location.region,location.country,location.url
0,3209265,Abejones,Oaxaca,Mexico,abejones-oaxaca-mexico
1,3217399,Acatlan De Perez Figueroa,Oaxaca,Mexico,acatlan-de-perez-figueroa-oaxaca-mexico
2,3209199,Asuncion Cacalotepec,Oaxaca,Mexico,asuncion-cacalotepec-oaxaca-mexico
3,3209198,Asuncion Cuyotepeji,Oaxaca,Mexico,asuncion-cuyotepeji-oaxaca-mexico
4,3217411,Asuncion Ixtaltepec,Oaxaca,Mexico,asuncion-ixtaltepec-oaxaca-mexico


In [39]:
normalized_data_current_oax = pd.json_normalize(data_current_oax)
df_current = pd.DataFrame.from_dict(normalized_data_current_oax)
df_current.head()

Unnamed: 0,location.name,location.region,location.country,location.lat,location.lon,location.tz_id,location.localtime_epoch,location.localtime,current.last_updated_epoch,current.last_updated,...,current.windchill_f,current.heatindex_c,current.heatindex_f,current.dewpoint_c,current.dewpoint_f,current.vis_km,current.vis_miles,current.uv,current.gust_mph,current.gust_kph
0,Abejones,Oaxaca,Mexico,17.4364,-96.6085,America/Mexico_City,1735632688,2024-12-31 02:11,1735632000,2024-12-31 02:00,...,56.2,14.1,57.4,4.1,39.4,10.0,6.0,0.0,9.4,15.1
1,Acatlan De Perez Figueroa,Oaxaca,Mexico,18.5397,-96.6057,America/Mexico_City,1735634812,2024-12-31 02:46,1735634700,2024-12-31 02:45,...,70.5,22.7,72.9,19.0,66.2,10.0,6.0,0.0,5.1,8.2
2,Asuncion Cacalotepec,Oaxaca,Mexico,17.0361,-95.955,America/Mexico_City,1735634817,2024-12-31 02:46,1735634700,2024-12-31 02:45,...,51.7,11.5,52.7,1.2,34.2,10.0,6.0,0.0,8.9,14.4
3,Asuncion Cuyotepeji,Oaxaca,Mexico,17.9167,-97.8,America/Mexico_City,1735634882,2024-12-31 02:48,1735634700,2024-12-31 02:45,...,58.0,13.9,57.1,6.6,43.9,10.0,6.0,0.0,5.6,9.1
4,Asuncion Ixtaltepec,Oaxaca,Mexico,16.5031,-95.0608,America/Mexico_City,1735634892,2024-12-31 02:48,1735634700,2024-12-31 02:45,...,70.6,24.3,75.8,18.3,65.0,10.0,6.0,0.0,7.3,11.7


In [40]:
normalized_data_astronomy_oax = pd.json_normalize(data_astronomy_oax)
df_astronomy = pd.DataFrame.from_dict(normalized_data_astronomy_oax)
df_astronomy.head()

Unnamed: 0,location.name,location.region,location.country,location.lat,location.lon,location.tz_id,location.localtime_epoch,location.localtime,astronomy.astro.sunrise,astronomy.astro.sunset,astronomy.astro.moonrise,astronomy.astro.moonset,astronomy.astro.moon_phase,astronomy.astro.moon_illumination,astronomy.astro.is_moon_up,astronomy.astro.is_sun_up
0,Abejones,Oaxaca,Mexico,17.4364,-96.6085,America/Mexico_City,1735637124,2024-12-31 03:25,06:57 AM,06:03 PM,07:44 AM,06:56 PM,Waxing Crescent,0,0,0
1,Acatlan De Perez Figueroa,Oaxaca,Mexico,18.5397,-96.6057,America/Mexico_City,1735634985,2024-12-31 02:49,06:59 AM,06:01 PM,07:46 AM,06:54 PM,Waxing Crescent,0,0,0
2,Asuncion Cacalotepec,Oaxaca,Mexico,17.0361,-95.955,America/Mexico_City,1735634983,2024-12-31 02:49,06:53 AM,06:01 PM,07:40 AM,06:54 PM,Waxing Crescent,0,0,0
3,Asuncion Cuyotepeji,Oaxaca,Mexico,17.9167,-97.8,America/Mexico_City,1735635051,2024-12-31 02:50,07:02 AM,06:07 PM,07:50 AM,07:00 PM,Waxing Crescent,0,0,0
4,Asuncion Ixtaltepec,Oaxaca,Mexico,16.5031,-95.0608,America/Mexico_City,1735635022,2024-12-31 02:50,06:49 AM,05:59 PM,07:35 AM,06:52 PM,Waxing Crescent,0,0,0


## Unión de información

In [41]:
df_astronomy_and_current = pd.merge(
    df_astronomy,
    df_current,
    on=[
        "location.name",
        "location.region",
        "location.country",
        "location.lat",
        "location.lon",
        "location.tz_id",
    ],
)
df_astronomy_and_current

Unnamed: 0,location.name,location.region,location.country,location.lat,location.lon,location.tz_id,location.localtime_epoch_x,location.localtime_x,astronomy.astro.sunrise,astronomy.astro.sunset,...,current.windchill_f,current.heatindex_c,current.heatindex_f,current.dewpoint_c,current.dewpoint_f,current.vis_km,current.vis_miles,current.uv,current.gust_mph,current.gust_kph
0,Abejones,Oaxaca,Mexico,17.4364,-96.6085,America/Mexico_City,1735637124,2024-12-31 03:25,06:57 AM,06:03 PM,...,56.2,14.1,57.4,4.1,39.4,10.0,6.0,0.0,9.4,15.1
1,Acatlan De Perez Figueroa,Oaxaca,Mexico,18.5397,-96.6057,America/Mexico_City,1735634985,2024-12-31 02:49,06:59 AM,06:01 PM,...,70.5,22.7,72.9,19.0,66.2,10.0,6.0,0.0,5.1,8.2
2,Asuncion Cacalotepec,Oaxaca,Mexico,17.0361,-95.955,America/Mexico_City,1735634983,2024-12-31 02:49,06:53 AM,06:01 PM,...,51.7,11.5,52.7,1.2,34.2,10.0,6.0,0.0,8.9,14.4
3,Asuncion Cuyotepeji,Oaxaca,Mexico,17.9167,-97.8,America/Mexico_City,1735635051,2024-12-31 02:50,07:02 AM,06:07 PM,...,58.0,13.9,57.1,6.6,43.9,10.0,6.0,0.0,5.6,9.1
4,Asuncion Ixtaltepec,Oaxaca,Mexico,16.5031,-95.0608,America/Mexico_City,1735635022,2024-12-31 02:50,06:49 AM,05:59 PM,...,70.6,24.3,75.8,18.3,65.0,10.0,6.0,0.0,7.3,11.7
5,Asuncion Nochixtlan,Oaxaca,Mexico,17.4581,-97.2233,America/Mexico_City,1735637152,2024-12-31 03:25,06:59 AM,06:05 PM,...,49.5,10.1,50.2,2.3,36.1,10.0,6.0,0.0,8.0,12.9
6,Asuncion Ocotlan,Oaxaca,Mexico,16.7619,-96.7214,America/Mexico_City,1735632886,2024-12-31 02:14,06:56 AM,06:05 PM,...,57.6,13.6,56.6,3.1,37.6,10.0,6.0,0.0,4.7,7.6
7,Asuncion Tlacolulita,Oaxaca,Mexico,16.3001,-95.7266,America/Mexico_City,1735635010,2024-12-31 02:50,06:51 AM,06:02 PM,...,67.4,19.6,67.4,14.6,58.3,10.0,6.0,0.0,8.2,13.2
8,Ayotzintepec,Oaxaca,Mexico,17.6733,-96.1288,America/Mexico_City,1735635044,2024-12-31 02:50,06:55 AM,06:01 PM,...,50.2,11.0,51.8,0.5,32.8,10.0,6.0,0.0,10.8,17.4
9,Barrio De La Soledad,Oaxaca,Mexico,15.84,-96.3233,America/Mexico_City,1735632903,2024-12-31 02:15,06:52 AM,06:05 PM,...,71.6,24.6,76.3,15.2,59.3,10.0,6.0,0.0,8.1,13.0


In [42]:
df_full = pd.merge(
    df_search,
    df_astronomy_and_current,
    on=[
        "location.name",
        "location.region",
        "location.country",
    ],
)

In [46]:
df_full.head()

Unnamed: 0,location.id,location.name,location.region,location.country,location.url,location.lat,location.lon,location.tz_id,location.localtime_epoch_x,location.localtime_x,...,current.windchill_f,current.heatindex_c,current.heatindex_f,current.dewpoint_c,current.dewpoint_f,current.vis_km,current.vis_miles,current.uv,current.gust_mph,current.gust_kph
0,3209265,Abejones,Oaxaca,Mexico,abejones-oaxaca-mexico,17.4364,-96.6085,America/Mexico_City,1735637124,2024-12-31 03:25,...,56.2,14.1,57.4,4.1,39.4,10.0,6.0,0.0,9.4,15.1
1,3217399,Acatlan De Perez Figueroa,Oaxaca,Mexico,acatlan-de-perez-figueroa-oaxaca-mexico,18.5397,-96.6057,America/Mexico_City,1735634985,2024-12-31 02:49,...,70.5,22.7,72.9,19.0,66.2,10.0,6.0,0.0,5.1,8.2
2,3209199,Asuncion Cacalotepec,Oaxaca,Mexico,asuncion-cacalotepec-oaxaca-mexico,17.0361,-95.955,America/Mexico_City,1735634983,2024-12-31 02:49,...,51.7,11.5,52.7,1.2,34.2,10.0,6.0,0.0,8.9,14.4
3,3209198,Asuncion Cuyotepeji,Oaxaca,Mexico,asuncion-cuyotepeji-oaxaca-mexico,17.9167,-97.8,America/Mexico_City,1735635051,2024-12-31 02:50,...,58.0,13.9,57.1,6.6,43.9,10.0,6.0,0.0,5.6,9.1
4,3217411,Asuncion Ixtaltepec,Oaxaca,Mexico,asuncion-ixtaltepec-oaxaca-mexico,16.5031,-95.0608,America/Mexico_City,1735635022,2024-12-31 02:50,...,70.6,24.3,75.8,18.3,65.0,10.0,6.0,0.0,7.3,11.7


## Exportación de datos

In [43]:
df_full.to_csv("dataset/dataset_raw/weather.csv")