# 2.1 Obtención de datos mediante APIs

In [1]:
import requests
import time
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from unidecode import unidecode

In [34]:
# Set up the WebDriver
driver = webdriver.Chrome()

# Open the Python website
driver.get(
    "https://cuentame.inegi.org.mx/monografias/informacion/oax/territorio/div_municipal.aspx?tema=me&e=20"
)

# Wait to url is ready
time.sleep(5)

# Read and save information
oax_municipalities = []
for i in range(1, 571):
    state = driver.find_element(
        By.XPATH, "//*[@id='keywords2']/tbody/tr[" + str(i) + "]/td[2]"
    )
    # Save and clean string
    oax_municipalities.append(unidecode(state.text))

# Close the browser
driver.quit()

In [35]:
municipalities = oax_municipalities[151:161]
municipalities

['San Francisco Tlapancingo',
 'San Gabriel Mixtepec',
 'San Ildefonso Amatlan',
 'San Ildefonso Sola',
 'San Ildefonso Villa Alta',
 'San Jacinto Amilpas',
 'San Jacinto Tlacotepec',
 'San Jeronimo Coatlan',
 'San Jeronimo Silacayoapilla',
 'San Jeronimo Sosola']

Lista de estados de donde quiero obtener la información.

In [6]:
locations = [
    "Aguascalientes",
    "Baja California",
    "Baja California Sur",
    "Campeche",
    "Chiapas",
    "Chihuahua",
    "Ciudad de México",
    "Coahuila",
    "Colima",
    "Durango",
    "Estado de México",
    "Guanajuato",
    "Guerrero",
    "Hidalgo",
    "Jalisco",
    "Michoacán",
    "Morelos",
    "Nayarit",
    "Nuevo León",
    "Oaxaca",
    "Puebla",
    "Querétaro",
    "Quintana Roo",
    "San Luis Potosí",
    "Sinaloa",
    "Sonora",
    "Tabasco",
    "Tamaulipas",
    "Tlaxcala",
    "Veracruz",
    "Yucatán",
    "Zacatecas",
]

In [36]:
len(municipalities)

10

## Envío de solicitudes a la API

In [8]:
def getApiData(endpoint, payload, locations, getFirst):
    data_collected = []
    for location in locations:
        payload["q"] = location
        try:
            time.sleep(5)
            r = requests.get(url=endpoint, params=payload)
            if r.status_code == 200:
                json = r.json()
                if getFirst:
                    data_collected.append(json[0])
                else:
                    data_collected.append(json)
        except:
            continue
    return data_collected

In [30]:
def getApiDataOax(endpoint, payload, municipalities, getFirst):
    data_collected = []
    for municipality in municipalities:
        print(municipality)
        payload["q"] = "'name':'" + municipality + "','region':'oaxaca'"
        try:
            time.sleep(5)
            r = requests.get(url=endpoint, params=payload)
            if r.status_code == 200:
                json = r.json()
                if getFirst:
                    data_collected.append(json[0])
                else:
                    data_collected.append(json)
        except:
            continue
    return data_collected

In [23]:
# =============CODE FOR DEBUG==============
"""
municipalities = oax_states[80:84]
data_collected_0 = []
data_collected = []
for municipality in municipalities:
    payload["q"] = "'name':'" + municipality + "','region':'oaxaca'"
    r = requests.get(url=endpoint + search, params=payload)
    if r.status_code == 200:
        json = r.json()
        data_collected_0.append(json[0])
        data_collected.append(json)
"""

In [6]:
endpoint = "http://api.weatherapi.com/v1/"
payload = {"key": "cf111e82db2c47119ea162327242212"}

In [8]:
search = "/search.json"

In [31]:
data_search_oax = getApiDataOax(
    endpoint=endpoint + search,
    payload=payload,
    municipalities=oax_states[80:84],
    getFirst=True,
)

San Agustin Atenango
San Agustin Chayuco
San Agustin de las Juntas
San Agustin Etla


In [14]:
data_search = getApiData(
    endpoint=endpoint + search, payload=payload, locations=locations, getFirst=True
)

['San Agustin Atenango',
 'San Agustin Chayuco',
 'San Agustin de las Juntas',
 'San Agustin Etla']

In [12]:
current = "/current.json"

In [13]:
data_current = getApiData(
    endpoint=endpoint + current, payload=payload, locations=locations, getFirst=False
)

In [14]:
astronomy = "/astronomy.json"

In [15]:
data_astronomy = getApiData(
    endpoint=endpoint + astronomy, payload=payload, locations=locations, getFirst=False
)

In [26]:

normalized_data_search_oax = pd.json_normalize(data_collected_0)
df_search = pd.DataFrame.from_dict(normalized_data_search_oax)
df_search.head()

Unnamed: 0,id,name,region,country,lat,lon,url
0,3209970,San Agustin Atenango,Oaxaca,Mexico,17.61,-98.01,san-agustin-atenango-oaxaca-mexico
1,3208245,San Agustin Chayuco,Oaxaca,Mexico,17.25,-98.02,san-agustin-chayuco-oaxaca-mexico
2,3217385,San Agustin De Las Juntas,Oaxaca,Mexico,17.0,-96.71,san-agustin-de-las-juntas-oaxaca-mexico
3,3208244,San Agustin Etla,Oaxaca,Mexico,17.19,-96.77,san-agustin-etla-oaxaca-mexico


In [None]:
data_search_oax = getApiData(
    endpoint=endpoint + search, payload=payload, locations=oax_states, getFirst=True
)

In [18]:
normalized_data_search = pd.json_normalize(data_search)
df_search_temp = pd.DataFrame.from_dict(normalized_data_search).add_prefix("location.")
df_search = df_search_temp.drop(["location.lat", "location.lon"], axis=1)
df_search

Unnamed: 0,location.id,location.name,location.region,location.country,location.url
0,3296893,Aguascalientes,Aguascalientes,Mexico,aguascalientes-aguascalientes-mexico
1,3242121,Baja California,Chiapas,Mexico,baja-california-chiapas-mexico
2,3274846,Agrodelicias De La Baja Sur,Baja California Sur,Mexico,agrodelicias-de-la-baja-sur-baja-california-su...
3,3220121,Campeche,Campeche,Mexico,campeche-campeche-mexico
4,3241759,Chiapas Nuevo,Chiapas,Mexico,chiapas-nuevo-chiapas-mexico
5,3295284,Chihuahua,Chihuahua,Mexico,chihuahua-chihuahua-mexico
6,3198637,Ciudad Jardin,México,Mexico,ciudad-jardin-mxico-mexico
7,3182624,Coahuila,San Luis PotosÃ­,Mexico,coahuila-san-luis-potos-mexico
8,3325202,Colima,Colima,Mexico,colima-colima-mexico
9,707103,Durango,Pais Vasco,Spain,durango-pais-vasco-spain


In [19]:
normalized_data_current = pd.json_normalize(data_current)
df_current = pd.DataFrame.from_dict(normalized_data_current)
df_current.head()

Unnamed: 0,location.name,location.region,location.country,location.lat,location.lon,location.tz_id,location.localtime_epoch,location.localtime,current.last_updated_epoch,current.last_updated,...,current.windchill_f,current.heatindex_c,current.heatindex_f,current.dewpoint_c,current.dewpoint_f,current.vis_km,current.vis_miles,current.uv,current.gust_mph,current.gust_kph
0,Aguascalientes,Aguascalientes,Mexico,21.8823,-102.2826,America/Mexico_City,1735588899,2024-12-30 14:01,1735588800,2024-12-30 14:00,...,76.3,23.4,74.1,-2.7,27.1,10.0,6.0,6.0,6.7,10.8
1,Baja California,Chiapas,Mexico,16.4333,-93.7953,America/Mexico_City,1735588964,2024-12-30 14:02,1735588800,2024-12-30 14:00,...,80.7,28.0,82.3,16.8,62.3,10.0,6.0,5.3,5.4,8.7
2,Agrodelicias De La Baja Sur,Baja California Sur,Mexico,23.775,-110.2737,America/Mazatlan,1735588911,2024-12-30 13:01,1735588800,2024-12-30 13:00,...,76.4,25.5,77.9,13.6,56.6,10.0,6.0,5.9,10.8,17.3
3,Campeche,Campeche,Mexico,19.838,-90.5277,America/Merida,1735588916,2024-12-30 14:01,1735588800,2024-12-30 14:00,...,82.0,31.8,89.3,24.0,75.2,11.0,6.0,4.5,9.3,15.0
4,Chiapas Nuevo,Chiapas,Mexico,16.4553,-93.8272,America/Mexico_City,1735588922,2024-12-30 14:02,1735588800,2024-12-30 14:00,...,80.7,28.0,82.3,16.8,62.3,10.0,6.0,5.3,5.4,8.7


In [20]:
normalized_data_astronomy = pd.json_normalize(data_astronomy)
df_astronomy = pd.DataFrame.from_dict(normalized_data_astronomy)
df_astronomy.head()

Unnamed: 0,location.name,location.region,location.country,location.lat,location.lon,location.tz_id,location.localtime_epoch,location.localtime,astronomy.astro.sunrise,astronomy.astro.sunset,astronomy.astro.moonrise,astronomy.astro.moonset,astronomy.astro.moon_phase,astronomy.astro.moon_illumination,astronomy.astro.is_moon_up,astronomy.astro.is_sun_up
0,Aguascalientes,Aguascalientes,Mexico,21.8823,-102.2826,America/Mexico_City,1735586980,2024-12-30 13:29,07:28 AM,06:16 PM,07:24 AM,06:08 PM,New Moon,1,0,1
1,Baja California,Chiapas,Mexico,16.4333,-93.7953,America/Mexico_City,1735589080,2024-12-30 14:04,06:43 AM,05:53 PM,06:34 AM,05:46 PM,New Moon,1,0,1
2,Agrodelicias De La Baja Sur,Baja California Sur,Mexico,23.775,-110.2737,America/Mazatlan,1735589086,2024-12-30 13:04,07:03 AM,05:45 PM,07:02 AM,05:36 PM,New Moon,1,0,1
3,Campeche,Campeche,Mexico,19.838,-90.5277,America/Merida,1735588370,2024-12-30 13:52,06:36 AM,05:34 PM,06:29 AM,05:24 PM,New Moon,1,0,1
4,Chiapas Nuevo,Chiapas,Mexico,16.4553,-93.8272,America/Mexico_City,1735589160,2024-12-30 14:06,06:43 AM,05:53 PM,06:34 AM,05:46 PM,New Moon,1,0,1


In [21]:
df_astronomy_current = pd.merge(
    df_astronomy,
    df_current,
    on=[
        "location.name",
        "location.region",
        "location.country",
        "location.lat",
        "location.lon",
        "location.tz_id",
    ],
)
df_astronomy_current

Unnamed: 0,location.name,location.region,location.country,location.lat,location.lon,location.tz_id,location.localtime_epoch_x,location.localtime_x,astronomy.astro.sunrise,astronomy.astro.sunset,...,current.windchill_f,current.heatindex_c,current.heatindex_f,current.dewpoint_c,current.dewpoint_f,current.vis_km,current.vis_miles,current.uv,current.gust_mph,current.gust_kph
0,Aguascalientes,Aguascalientes,Mexico,21.8823,-102.2826,America/Mexico_City,1735586980,2024-12-30 13:29,07:28 AM,06:16 PM,...,76.3,23.4,74.1,-2.7,27.1,10.0,6.0,6.0,6.7,10.8
1,Baja California,Chiapas,Mexico,16.4333,-93.7953,America/Mexico_City,1735589080,2024-12-30 14:04,06:43 AM,05:53 PM,...,80.7,28.0,82.3,16.8,62.3,10.0,6.0,5.3,5.4,8.7
2,Agrodelicias De La Baja Sur,Baja California Sur,Mexico,23.775,-110.2737,America/Mazatlan,1735589086,2024-12-30 13:04,07:03 AM,05:45 PM,...,76.4,25.5,77.9,13.6,56.6,10.0,6.0,5.9,10.8,17.3
3,Campeche,Campeche,Mexico,19.838,-90.5277,America/Merida,1735588370,2024-12-30 13:52,06:36 AM,05:34 PM,...,82.0,31.8,89.3,24.0,75.2,11.0,6.0,4.5,9.3,15.0
4,Chiapas Nuevo,Chiapas,Mexico,16.4553,-93.8272,America/Mexico_City,1735589160,2024-12-30 14:06,06:43 AM,05:53 PM,...,80.7,28.0,82.3,16.8,62.3,10.0,6.0,5.3,5.4,8.7
5,Chihuahua,Chihuahua,Mexico,28.6353,-106.0889,America/Chihuahua,1735587008,2024-12-30 13:30,07:57 AM,06:18 PM,...,78.7,24.4,76.0,-11.0,12.3,16.0,9.0,4.7,2.6,4.1
6,Ciudad Jardin,México,Mexico,19.4222,-99.0167,America/Mexico_City,1735589108,2024-12-30 14:05,07:10 AM,06:08 PM,...,72.9,22.2,72.0,-1.5,29.4,11.0,6.0,6.5,3.6,5.8
7,Coahuila,San Luis PotosÃ­,Mexico,21.9313,-100.7511,America/Mexico_City,1735589142,2024-12-30 14:05,07:22 AM,06:10 PM,...,75.5,23.1,73.6,-1.3,29.7,19.0,11.0,5.9,8.0,12.9
8,Colima,Colima,Mexico,19.2434,-103.7277,America/Mexico_City,1735589176,2024-12-30 14:06,07:28 AM,06:27 PM,...,90.8,31.5,88.7,10.5,50.9,10.0,6.0,6.4,5.4,8.7
9,Durango,Pais Vasco,Spain,43.1667,-2.6167,Europe/Madrid,1735589184,2024-12-30 21:06,08:42 AM,05:45 PM,...,44.8,7.5,45.4,2.7,36.8,10.0,6.0,0.0,4.8,7.7


In [22]:
df_full = pd.merge(
    df_search,
    df_astronomy_current,
    on=[
        "location.name",
        "location.region",
        "location.country",
    ],
)

In [23]:
df_full.to_csv("dataset/dataset_raw/weather.csv")