In [None]:
# Importamos las librerías que necesitamos
import tqdm as tddm
from bs4 import BeautifulSoup
import time
from time import sleep
import pandas as pd
import numpy as np
import random

# Importar librerías para automatización de navegadores web con Selenium
# -----------------------------------------------------------------------
from selenium import webdriver  # Selenium es una herramienta para automatizar la interacción con navegadores web.
from webdriver_manager.chrome import ChromeDriverManager  # ChromeDriverManager gestiona la instalación del controlador de Chrome.
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys  # Keys es útil para simular eventos de teclado en Selenium.
from selenium.webdriver.support.ui import Select  # Select se utiliza para interactuar con elementos <select> en páginas web.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException # Excepciones comunes de selenium que nos podemos encontrar 

import aiohttp
import aiofiles
import asyncio
import nest_asyncio


In [None]:
csv_municipios = pd.read_csv("csv_municipios.csv", index_col = 0)
lista_codigos = csv_municipios["index"].tolist()

In [None]:
print(lista_codigos)

In [None]:
def crear_df(sopa, municipio, mes):
    lista_df = []
    for tabla in sopa:
        rows = tabla.findChildren("tr", recursive=False)
        df = pd.DataFrame([row.text for row in rows])
        lista_df.append(df)
    
    df_completo = pd.concat(lista_df, axis=1)
    df_completo.columns = ["Día", "Temp.", "DP", "Hum.", "WS", "Press.", "Prec."]
    df_final = df_completo.iloc[1:]
    
    meses = {"-1": "Enero", "-2": "Febrero", "-3": "Marzo", "-4": "Abril", "-5": "Mayo", "-6": "Junio", 
                  "-7": "Julio", "-8": "Agosto", "-9": "Septiembre", "10": "Octubre", "11": "Noviembre", "12": "Diciembre"}
    df_final["Mes"]=mes
    df_final["Mes"] = df_final["Mes"].replace(meses)
    df_final["Municipio"] = municipio
    
    for col in ["Temp.", "DP", "Hum.", "WS", "Press."]:
        cols = ["Max", " Avg", " Min"]
        df_final[[col + c for c in cols]] = df_final[col].str.split(expand=True)
        df_final.drop(columns=col, inplace=True)
    
    df_final["Prec."] = df_final.pop("Prec.")
    
    return df_final

In [None]:
nest_asyncio.apply()

async def fetch_url(session, url):
    async with session.get(url) as response:
        return await response.text()

async def process_url(session, url, municipio, mes_url, lista_tablas):
    try:
        html_table_page = await fetch_url(session, url)
        sopa = BeautifulSoup(html_table_page, "html.parser")
        tabla = sopa.findAll("table", {"aria-labelledby": "Days data"})
        if not tabla:
            print(f"No se encontró tabla para la URL: {url}")
            return

        tabla_pagina = crear_df(tabla, municipio, mes_url)
        if tabla_pagina is not None:
            lista_tablas.append(tabla_pagina)
        else:
            print(f"No se ha encontrado la tabla del municipio {municipio} para {mes_url}")
    except Exception as e:
        print(f"Error procesando la URL {url}, motivo: {e}")

async def main():
    driver = webdriver.Chrome()
    url_wunder = "https://www.wunderground.com/history"
    driver.get(url_wunder)
    driver.maximize_window()
    await asyncio.sleep(2)

    try:
        iframe = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//*[@id="sp_message_iframe_1165301"]')))
        driver.switch_to.frame(iframe)
        driver.find_element(By.CSS_SELECTOR, "#notice > div.message-component.message-row.cta-buttons-container > div.message-component.message-column.cta-button-column.reject-column > button").click()
        driver.switch_to.default_content()
    except Exception as e:
        print(f"Error handling cookies: {e}")
    await asyncio.sleep(2)

    dfs_municipios = []

    async with aiohttp.ClientSession() as session:
        for municipio in lista_codigos:
            try:
                driver.find_element(By.CSS_SELECTOR, "#historySearch").send_keys(municipio, Keys.TAB)
                await asyncio.sleep(2)
                driver.find_element(By.CSS_SELECTOR, "#dateSubmit").click()
                await asyncio.sleep(2)
                driver.find_element(By.CSS_SELECTOR, "#dateSubmit").click()
                await asyncio.sleep(4)
                driver.find_element(By.CSS_SELECTOR, "#inner-content > div.region-content-main > div.row > div:nth-child(1) > div:nth-child(1) > div > lib-link-selector > div > div > div > a:nth-child(3)").click()
                await asyncio.sleep(2)
            except Exception as e:
                print(f"Error navigating URL for {municipio}: {e}")
                continue

            url_actual = driver.current_url
            lista_urls = [url_actual[:-2] + str(mes) for mes in range(1, 11)]
            lista_tablas = []

            tasks = [process_url(session, url, municipio, url[-2:], lista_tablas) for url in lista_urls]
            await asyncio.gather(*tasks)

            if lista_tablas:
                df_unido = pd.concat(lista_tablas, ignore_index=True)
                dfs_municipios.append(df_unido)
                driver.get(url_wunder)
            else:
                print(f"No se encontraron tablas de datos de {municipio}")
                driver.get(url_wunder)

    driver.quit()
    df_final = pd.concat(dfs_municipios, ignore_index=True)
    return df_final

asyncio.run(main())
