In [3]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import seaborn as sns

def make_predictions_with_confidence_intervals(model, scaler, group, future_temps, look_back=6, n_bootstrap=2, alpha=0.01, n_forecasts_per_month=5):
    features = ['landed_w_kg','Cluster_Label', 'mean_temp_30m','mean_temp_10m','thetao_sfc=6',
                'thetao_sfc=7.92956018447876','thetao_sfc=9.572997093200684','thetao_sfc=11.40499973297119',
                'thetao_sfc=13.46714019775391','thetao_sfc=15.8100700378418','thetao_sfc=18.49555969238281',
                'thetao_sfc=21.59881973266602','thetao_sfc=25.21141052246094','thetao_sfc=29.44473075866699']
    
    group_scaled = scaler.transform(group[features])
    
    # Preparar X_input manteniendo la forma correcta (1, 6, 14)
    X_input = group_scaled[-look_back:].astype(np.float32)
    X_input = np.reshape(X_input, (1, X_input.shape[0], X_input.shape[1]))

    predictions = []
    for i in range(len(future_temps)):
        monthly_predictions = []
        for _ in range(n_forecasts_per_month):
            pred = model.predict(X_input)
            monthly_predictions.append(pred[0][0])
            
            future_temp_values = future_temps.iloc[i][['mean_temp_30m', 'mean_temp_10m', 'thetao_sfc=6', 
                                                       'thetao_sfc=7.92956018447876', 'thetao_sfc=9.572997093200684',
                                                       'thetao_sfc=11.40499973297119', 'thetao_sfc=13.46714019775391',
                                                       'thetao_sfc=15.8100700378418', 'thetao_sfc=18.49555969238281',
                                                       'thetao_sfc=21.59881973266602', 'thetao_sfc=25.21141052246094',
                                                       'thetao_sfc=29.44473075866699']].values.reshape(1, -1).astype(np.float32)
            
            new_record = np.hstack((pred, np.array([[group['Cluster_Label'].iloc[0]]], dtype=np.float32), future_temp_values))
            new_record_df = pd.DataFrame(new_record, columns=features)
            new_record_scaled = scaler.transform(new_record_df)  # Escalar el nuevo registro
            
            # Mantener la forma correcta al agregar el nuevo registro
            new_record_scaled = np.reshape(new_record_scaled, (1, 1, -1))
            X_input = np.append(X_input[:, 1:, :], new_record_scaled, axis=1)

        predictions.extend(monthly_predictions)
    
    predictions = np.array(predictions).reshape(-1, 1)
    # Crear un array de ceros con las mismas dimensiones que el grupo original escalado
    predictions_full = np.hstack((predictions, np.zeros((len(predictions), group_scaled.shape[1] - 1))))

    # Convertirlo a DataFrame con los nombres de columnas correctos antes de desescalar
    predictions_full_df = pd.DataFrame(predictions_full, columns=features)
    predictions_descaled = scaler.inverse_transform(predictions_full_df)[:, 0]
    
    # Bootstrap para bandas de confianza
    bootstrap_predictions = []
    for _ in range(n_bootstrap):
        X_input_bootstrap = group_scaled[-look_back:].astype(np.float32)
        X_input_bootstrap = np.reshape(X_input_bootstrap, (1, X_input_bootstrap.shape[0], X_input_bootstrap.shape[1]))

        bootstrap_pred = []
        for i in range(len(future_temps)):
            monthly_bootstrap_predictions = []
            for _ in range(n_forecasts_per_month):
                pred_boot = model.predict(X_input_bootstrap)
                monthly_bootstrap_predictions.append(pred_boot[0][0])
                
                future_temp_values = future_temps.iloc[i][['mean_temp_30m', 'mean_temp_10m', 'thetao_sfc=6', 
                                                           'thetao_sfc=7.92956018447876', 'thetao_sfc=9.572997093200684',
                                                           'thetao_sfc=11.40499973297119', 'thetao_sfc=13.46714019775391',
                                                           'thetao_sfc=15.8100700378418', 'thetao_sfc=18.49555969238281',
                                                           'thetao_sfc=21.59881973266602', 'thetao_sfc=25.21141052246094',
                                                           'thetao_sfc=29.44473075866699']].values.reshape(1, -1).astype(np.float32)
                
                new_record_boot = np.hstack((pred_boot, np.array([[group['Cluster_Label'].iloc[0]]], dtype=np.float32), future_temp_values))
                new_record_boot_df = pd.DataFrame(new_record_boot, columns=features)
                new_record_boot_scaled = scaler.transform(new_record_boot_df)  # Escalar el nuevo registro
                
                new_record_boot_scaled = np.reshape(new_record_boot_scaled, (1, 1, -1))
                X_input_bootstrap = np.append(X_input_bootstrap[:, 1:, :], new_record_boot_scaled, axis=1)

            bootstrap_pred.extend(monthly_bootstrap_predictions)
        
        bootstrap_pred = np.array(bootstrap_pred).reshape(-1, 1)
        bootstrap_pred_full = np.hstack((bootstrap_pred, np.zeros((len(bootstrap_pred), group_scaled.shape[1] - 1))))
        bootstrap_pred_full_df = pd.DataFrame(bootstrap_pred_full, columns=features)
        bootstrap_pred_descaled = scaler.inverse_transform(bootstrap_pred_full_df)[:, 0]
        bootstrap_predictions.append(bootstrap_pred_descaled)
    
    bootstrap_predictions = np.array(bootstrap_predictions)
    lower_bound = np.percentile(bootstrap_predictions, 100 * alpha / 2, axis=0)
    upper_bound = np.percentile(bootstrap_predictions, 100 * (1 - alpha / 2), axis=0)
    
    return predictions_descaled, lower_bound, upper_bound

specie='ALMEJA'
cluster_label=4
n_forecasts_per_month=5
results_dir = 'Resultados'

path_model = f'modelos_moe/{specie}_cluster_{cluster_label}_moe_model.h5'
model = tf.keras.models.load_model(path_model)
scaler_path = f'modelos_moe/{specie}_cluster_{cluster_label}_moe_scaler.pkl'
scaler = joblib.load(scaler_path)
data = pd.read_csv('data/data.csv',low_memory=False)


future_temps = pd.read_csv('future_temp.csv')
future_temps = future_temps[future_temps['Cluster_Label'] == cluster_label]
future_temps['date'] = pd.to_datetime(future_temps['year'].astype(str) + '-' + future_temps['month'].astype(str))
future_temps = future_temps.sort_values(by='date')

predictions, lower_bound, upper_bound = make_predictions_with_confidence_intervals(model, scaler, data, future_temps, look_back=6, n_bootstrap=1, alpha=0.01,n_forecasts_per_month=5)

if not os.path.exists(results_dir):
    os.makedirs(results_dir)

expanded_dates = future_temps['date'].repeat(n_forecasts_per_month).reset_index(drop=True)

data_to_save = pd.DataFrame({
    'date': expanded_dates,
    'predictions': predictions,
    #'lower_bound': lower_bound,  # Descomenta si tienes estos datos y quieres guardarlos
    #'upper_bound': upper_bound   # Descomenta si tienes estos datos y quieres guardarlos
})

csv_path = os.path.join(results_dir, f'predicciones_{specie}_cluster_{cluster_label}.csv')
data_to_save.to_csv(csv_path, index=False)


# Directorio donde se encuentran los archivos de predicciones y datos históricos
results_directory = 'Resultados'
historical_data_path = 'data/data.csv'  # Ruta al archivo de datos históricos


# Cargar los datos históricos
historical_data = pd.read_csv(historical_data_path)
historical_data['date'] = pd.to_datetime(historical_data['date'])

# Filtrar los datos históricos para la especie seleccionada
historical_data = historical_data[historical_data['species'] == specie]

# Agregar los datos históricos por año
historical_data['year'] = historical_data['date'].dt.year
historical_data['type'] = 'Historico'

# Obtener la lista de archivos de predicciones que coincidan con la especie y el clúster
prediction_files = [
    f for f in os.listdir(results_directory) 
    if f.startswith(f'predicciones_{specie}_cluster_{cluster_label}') and f.endswith('.csv')
]

# Inicializar un DataFrame para almacenar los datos de predicciones anuales
all_predictions = pd.DataFrame()

# Leer cada archivo y agregar los datos de predicciones anuales
for file in prediction_files:
    # Leer el archivo de predicciones
    file_path = os.path.join(results_directory, file)
    df = pd.read_csv(file_path)
    
    # Asegurar que la columna 'date' sea de tipo datetime
    df['date'] = pd.to_datetime(df['date'])
    
    # Agregar los datos de predicciones por año
    df['year'] = df['date'].dt.year
    df['type'] = 'Pronostico'
    
    # Añadir las predicciones al DataFrame principal
    all_predictions = pd.concat([all_predictions, df[['year', 'predictions', 'type']]], ignore_index=True)

# Renombrar la columna 'predictions' en el DataFrame de predicciones para unificar con el histórico
all_predictions.rename(columns={'predictions': 'landed_w_kg'}, inplace=True)

# Combinar los datos históricos y las predicciones en un solo DataFrame
combined_data = pd.concat([historical_data[['year', 'landed_w_kg', 'type']], all_predictions], ignore_index=True)

# Filtrar los datos para mostrar solo los años a partir de 2020
combined_data = combined_data[combined_data['year'] >= 2020]

# Configurar la paleta de colores para diferenciar entre 'Historico' y 'Pronostico'
palette = {'Historico': 'lightblue', 'Pronostico': 'lightgreen'}

# Crear la figura para la gráfica de boxplot con seaborn
plt.figure(figsize=(12, 4))

# Crear el boxplot usando seaborn
sns.boxplot(x='year', y='landed_w_kg', hue='type', data=combined_data, palette=palette, whis=1.2,showfliers=False)

# Mejorar la legibilidad de las etiquetas del eje x
plt.xticks(rotation=45, ha='right')

# Configurar los ejes y el título
plt.title(f'Boxplot of Predictions and Historical Data for {specie} - Cluster {cluster_label}')
plt.xlabel('Year')
plt.ylabel('Landed_w_kg')
plt.grid(False)

# Ajustar el layout
plt.tight_layout()




KeyboardInterrupt: 