<a href="https://colab.research.google.com/github/armandochernandez-ai/Curso-python-slava/blob/main/Clima/PRONOSTICO_PMAX24.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import os
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive')

#Cargar Base de datos
file_path = '/content/drive/My Drive/Clima_Jalisco/estaciones_jalmes_FL.csv'
df_clima = pd.read_csv(file_path)
display(df_clima.head())
display(df_clima.info())

#Filtrar datos para ejemplo
df_filtered = df_clima[df_clima['Codigo'] == 14002].copy()
display(df_filtered.head())
display(df_filtered.info())

#Calculo de pmax24 anual
df_filtered['date'] = pd.to_datetime(df_filtered['date'])
df_filtered = df_filtered.set_index('date')
pmax24_annual_max = df_filtered['pmax24'].resample('AS').max()
pmax24_annual_max = pmax24_annual_max.dropna()
display(pmax24_annual_max.head())

#Preparar datos para análisis de tendencia
pmax24_sorted = pmax24_annual_max.sort_values(ascending=False)
df_sorted = pmax24_sorted.to_frame().reset_index()
n = len(df_sorted)
df_sorted['rank'] = range(1, n + 1)
df_sorted['probability'] = df_sorted['rank'] / (n + 1)
df_sorted['return_period'] = (n + 1) / df_sorted['rank']
display(df_sorted.head())

#Selección y ordenar variable por periodo de retorno
y = df_sorted['pmax24']
# Modified: Create X DataFrame using a dictionary
X = pd.DataFrame({'log_return_period': np.log(df_sorted['return_period'])})

display(X.head())
display(y.head())

# Check if X is empty before fitting the model
if not X.empty:
    #Cálculo de modelo
    modelo = LinearRegression()
    modelo.fit(X, y)
    print(f"Coeficiente: {modelo.coef_}")
    print(f"Intercepto: {modelo.intercept_}")

    # Generar el gráfico de tendencia
    fig, ax = plt.subplots(figsize=(10, 6)) # Create figure and axes
    ax.scatter(df_sorted['return_period'], y, label='Datos') # Use return period on the x-axis
    ax.plot(df_sorted['return_period'], modelo.predict(X), color='red', label='Tendencia Lineal') # Plot regression line against return period
    ax.set_xlabel('Años (retorno)') # Change x-axis label to 'Periodo de Retorno'
    ax.set_ylabel('pmax24 Anual')
    # Modified: Include site and code in the title
    site_name = df_filtered['site'].iloc[0] if not df_filtered.empty else 'Unknown Site'
    site_code = df_filtered['Codigo'].iloc[0] if not df_filtered.empty else 'Unknown Code'
    ax.set_title(f'TENDENCIA DE pmax24 ANUAL {site_name} ({site_code})')
    ax.legend()
    ax.grid(True)

    # Add model parameters and formula to the plot
    intercept = modelo.intercept_
    coef = modelo.coef_[0]
    formula = f'y = {intercept:.2f} + ({coef:.2f} * log(x))'
    textstr = f'a (Intercepto): {intercept:.2f}\nb (Coeficiente): {coef:.2f}\nFórmula: {formula}'
    ax.text(0.7, 0.05, textstr, transform=ax.transAxes, fontsize=8,
             verticalalignment='bottom', horizontalalignment='left', bbox=dict(boxstyle='round,pad=0.5', fc='skyblue', alpha=0.5))

    plt.tight_layout() # Adjust layout to prevent labels overlapping

    # Directorio para guardar resultados
    save_dir = '/content/drive/My Drive/Clima_Jalisco/GRAFICOS_PMAX24'
    os.makedirs(save_dir, exist_ok=True)
    file_path = os.path.join(save_dir, f'pmax24_{site_name}_{site_code}.png') # Changed filename

    # GUardar el gráfico de tendencia
    plt.savefig(file_path)

    # Display the plot in the notebook
    plt.show()

    plt.close(fig) # Close the figure using the figure object

    print(f"Scatter plot saved to {file_path}")
else:
    print("X is empty. Cannot fit the model.")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import os
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive')

#Cargar Base de datos
file_path = '/content/drive/My Drive/Clima_Jalisco/estaciones_jalmes_FL.csv'
try:
    df_clima = pd.read_csv(file_path)
    print("Data loaded successfully.")
except FileNotFoundError:
    print(f"Error: The file was not found at {file_path}")
    print("Please ensure the file exists at the specified path in your Google Drive.")
    df_clima = None # Grarntiza no proceder con un dataframe vacio

if df_clima is not None:
    # Dar formato de fecha
    df_clima['date'] = pd.to_datetime(df_clima['date'])

    # Codigos únicos
    unique_codes = df_clima['Codigo'].unique()
    print(f"Found {len(unique_codes)} unique stations.")

    # Directorio para guardar resultados
    save_dir = '/content/drive/My Drive/Clima_Jalisco/GRAFICOS_PMAX24' # Directorio para guardar cada gráfico individual
    os.makedirs(save_dir, exist_ok=True)
    print(f"Save directory created: {save_dir}")


    for code in unique_codes:
        print(f"\nProcessing station code: {code}")
        # Filtrar datos para la estación actual
        df_filtered = df_clima[df_clima['Codigo'] == code].copy()

        if not df_filtered.empty:
            # Para poner nombre de cada gráfico y archivo
            site_name = df_filtered['site'].iloc[0] if 'site' in df_filtered.columns and not df_filtered['site'].empty else f'Code_{code}'

            #Calculo de pmax24 anual
            df_filtered = df_filtered.set_index('date')
            pmax24_annual_max = df_filtered['pmax24'].resample('AS').max()
            pmax24_annual_max = pmax24_annual_max.dropna()

            if not pmax24_annual_max.empty and len(pmax24_annual_max) > 1: # Asegurar datos suficientes para regresión
                #Preparar datos para análisis de tendencia
                pmax24_sorted = pmax24_annual_max.sort_values(ascending=False)
                df_sorted = pmax24_sorted.to_frame().reset_index()
                n = len(df_sorted)
                df_sorted['rank'] = range(1, n + 1)
                df_sorted['probability'] = df_sorted['rank'] / (n + 1)
                df_sorted['return_period'] = (n + 1) / df_sorted['rank']

                #Selección y ordenar variable por periodo de retorno
                y = df_sorted['pmax24']
                X = pd.DataFrame({'log_return_period': np.log(df_sorted['return_period'])})

                # Checar si X esta vacio despues de transformación logaritmica
                if not X.empty and not X['log_return_period'].isnull().all():
                    #Cálculo de modelo
                    try:
                        modelo = LinearRegression()
                        modelo.fit(X, y)
                        print(f"  Coeficiente: {modelo.coef_}")
                        print(f"  Intercepto: {modelo.intercept_}")

                        # Generar el gráfico de tendencia
                        fig, ax = plt.subplots(figsize=(10, 6))
                        ax.scatter(df_sorted['return_period'], y, label='Datos')
                        ax.plot(df_sorted['return_period'], modelo.predict(X), color='red', label='Tendencia Lineal')
                        ax.set_xlabel('Años (retorno)')
                        ax.set_ylabel('pmax24 Anual')
                        ax.set_title(f'TENDENCIA DE pmax24 ANUAL {site_name} ({code})')
                        ax.legend()
                        ax.grid(True)

                        # adicionar datos de modelo en el gráfico
                        intercept = modelo.intercept_
                        coef = modelo.coef_[0]
                        # Chear si coeficiente es valor individual antes de formatear
                        coef_formatted = coef if np.isscalar(coef) else coef[0] # Manejar casos donde coef si es array
                        formula = f'y = {coef_formatted:.2f} * log(x) + {intercept:.2f}'
                        textstr = f'a (Intercepto): {intercept:.2f}\nb (Coeficiente): {coef_formatted:.2f}\nFórmula: {formula}'
                        ax.text(0.7, 0.05, textstr, transform=ax.transAxes, fontsize=8,
                                 verticalalignment='bottom', horizontalalignment='left', bbox=dict(boxstyle='round,pad=0.5', fc='skyblue', alpha=0.5))

                        plt.tight_layout()

                        # Guardar el gráfico de tendencia
                        file_name = f'pmax24_trend_{site_name}_{code}.png'
                        file_path = os.path.join(save_dir, file_name)
                        plt.savefig(file_path)
                        plt.close(fig) # Cerrar y gguardar

                        print(f"  Scatter plot saved to {file_path}")

                    except Exception as e:
                        print(f"  Error during model fitting or plotting for station {code}: {e}")
                        plt.close(fig) # Asegurar que fue cerrado

                else:
                    print(f"  Not enough valid data points after log transformation for station {code} to fit the model.")
            else:
                print(f"  Not enough annual maximum pmax24 data for station {code} to perform trend analysis.")
        else:
            print(f"  No data found for station code: {code}")

    print("\nScript finished.")