<a href="https://colab.research.google.com/github/claudialeguiza/AA1-TUIA-Kidonakis-Leguiza/blob/navegador/app.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [146]:
!pip install streamlit



In [147]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import RobustScaler, FunctionTransformer
from imblearn.over_sampling import SMOTE
from sklearn.pipeline import Pipeline
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.metrics import Precision
from keras.utils import to_categorical
import joblib
from datetime import date
import warnings
import streamlit as st
warnings.simplefilter('ignore')

In [148]:
df = pd.read_csv('/content/weatherAUS.csv', delimiter = ",")

In [149]:
def preprocesamiento(datos):

    data = datos[datos.Location\
                      .isin(( 'Sydney','SydneyAirport','Melbourne', 'MelbourneAirport',\
                             'Canberra','Adelaide', 'MountGambier','Cobar', 'Dartmoor' ))]


    data = data.drop('Unnamed: 0', axis =1)

    # Definir columnas con valores nulos
    columnas_con_nulos = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
                          'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm','Humidity9am',
                          'Humidity3pm', 'Pressure9am','Pressure3pm', 'Cloud9am',
                          'Cloud3pm', 'Temp9am', 'Temp3pm', 'RainfallTomorrow']

    # Rellenar valores faltantes en 'RainToday' y 'RainTomorrow'
    data['RainToday'] = data.groupby('Date')['RainToday'].transform(lambda x: x.fillna(x.mode().iloc[0]))
    data['RainTomorrow'] = data.groupby('Date')['RainTomorrow'].transform(lambda x: x.fillna(x.mode().iloc[0]))

    # Rellenar valores faltantes en direcciones del viento
    data['WindGustDir'] = data.groupby('Date')['WindGustDir'].transform(lambda x: x.fillna(x.mode().iloc[0]) if not x.isna().all() else x)
    data['WindDir9am'] = data.groupby('Date')['WindDir9am'].transform(lambda x: x.fillna(x.mode().iloc[0]) if not x.isna().all() else x)
    data['WindDir3pm'] = data.groupby('Date')['WindDir3pm'].transform(lambda x: x.fillna(x.mode().iloc[0]) if not x.isna().all() else x)

    # Rellenar valores faltantes con la media por día para las columnas especificadas
    media_por_dia = data.groupby('Date')[columnas_con_nulos].transform('mean')
    data[columnas_con_nulos] = data[columnas_con_nulos].fillna(media_por_dia)

    data['Date'] = pd.to_datetime(data['Date'])

    return data


In [150]:
def crear_columna_season(data):
   data['season'] = data['Date'].apply(asignar_estacion)
   return data

In [151]:
def asignar_estacion(fecha):
    mes = fecha.month
    if mes in [12, 1, 2]:  # Verano: Diciembre, Enero, Febrero
        return 'Summer'
    elif mes in [3, 4, 5]:  # Otoño: Marzo, Abril, Mayo
        return 'Autumn'
    elif mes in [6, 7, 8]:  # Invierno: Junio, Julio, Agosto
        return 'Winter'
    else:  # Primavera: Septiembre, Octubre, Noviembre
        return 'Spring'

In [152]:
def codificar_variables(data):
    data1 = pd.get_dummies(data, columns=['RainToday', 'RainTomorrow','season', 'Location'], drop_first=True)

    # Crear columnas para WindGustDir, WindDir9am, WindDir3pm
    wind_directions = ["SW", "S", 'SSW', 'W', 'SSE', 'E', 'SE', 'NE', 'NNE', 'WSW', 'WNW', 'NW', 'N', 'ESE', 'ENE']
    for var in wind_directions:
        data1[f'WindGustDir_{var}'] = (data['WindGustDir'] == var).astype(int)
        data1[f'WindDir9am_{var}'] = (data['WindDir9am'] == var).astype(int)
        data1[f'WindDir3pm_{var}'] = (data['WindDir3pm'] == var).astype(int)

    return data1.drop(columns=['WindGustDir', 'WindDir9am', 'WindDir3pm'])

In [153]:
def robust_df(data):
  scaler = RobustScaler()
  data_scaled = scaler.fit_transform(data)
  return data_scaled

In [154]:
def truncar_dividir_df(data):
    data = data.sort_values(["Date"])
    fecha_especifica = '2009-01-01'
    data_filtrada = data[data['Date'] >= fecha_especifica]

    data_filtrada.reset_index(drop=True, inplace=True)  # Resetea el índice y no crea uno nuevo
    data_train = data_filtrada.iloc[:21658]


    return data_train

In [155]:
def eliminar_columnas_estandarizar(data):
    # Separar variables independientes y dependientes
    X_regresion = data.drop(columns =['RainfallTomorrow','Date'])
    X_scaled = robust_df(X_regresion)
    y_regresion = data['RainfallTomorrow']
    y_scaled = robust_df(y_regresion.values.reshape(-1,1))
    return X_scaled, y_scaled

In [156]:
def estandarizar_balancear_clas(data):
    X_clasificacion = data.drop(columns=['RainTomorrow_Yes','Date','RainfallTomorrow'])
    X_scaled1 = robust_df(X_clasificacion)
    y_clasificacion = data['RainTomorrow_Yes']
    y_scaled1 =robust_df(y_clasificacion.values.reshape(-1,1))
    smote = SMOTE(random_state=42)
    X_smote_scaled, y_smote_scaled = smote.fit_resample(X_scaled1, y_scaled1)

    return X_smote_scaled, y_smote_scaled

In [157]:
def cargar_modelo_regresion():
    # Cargamos el modelo
      modelo_regresion = load_model('/content/regression_model.h5')
      return modelo_regresion

In [158]:
def cargar_modelo_clasificacion():
      modelo_clasif = load_model('/content/classification_model_optimized.h5')
      return modelo_clasif

In [159]:
pipeline_prepara_datos = Pipeline([
    ('preproceso', FunctionTransformer(preprocesamiento, validate=False)),
    ('season', FunctionTransformer(crear_columna_season, validate=False)),
    ('codificar', FunctionTransformer(codificar_variables, validate=False))
])

# Obtener datos de entrenamiento
df_procesado = pipeline_prepara_datos.fit_transform(df)

pipeline_train_split = Pipeline([
    ('split', FunctionTransformer(truncar_dividir_df, validate=False)),
    ('estandarizar', FunctionTransformer(eliminar_columnas_estandarizar, validate=False)),
    ])

# Obtener datos de entrenamiento
X_train_scaled, y_train_scaled = pipeline_train_split.fit_transform(df_procesado)

regression_model = cargar_modelo_regresion()

# Entrenar el modelo
regression_model.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7d1b6524ac50>

In [160]:
pipeline_train_split_clas = Pipeline([
    ('split', FunctionTransformer(truncar_dividir_df, validate=False)),
    ('estandarizar_clas', FunctionTransformer(estandarizar_balancear_clas, validate=False)),
    ])

classification_model = cargar_modelo_clasificacion()

# Obtener datos de entrenamiento
X_smote, y_smote = pipeline_train_split_clas.fit_transform(df_procesado)

# Entrenar el modelo
classification_model.fit(X_smote, y_smote, epochs=100, batch_size=16)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7d1b654a0160>

In [161]:
def preparar_features(df):
   df_clima = preprocesamiento(df)
   features = list(df_clima.columns[:-2])
   features.remove('Date')

   features_numericas = [f for f in features if df_clima[f].dtype in ['int64','float64']]

   features_categoricas = [f for f in features if df_clima[f].dtype == 'object']

   return features, features_numericas, features_categoricas, df_clima

In [162]:
def preparar_prediccion(df):

    # Crear columnas para WindGustDir, WindDir9am, WindDir3pm
    wind_directions = ["SW", "S", 'SSW', 'W', 'SSE', 'E', 'SE', 'NE', 'NNE', 'WSW', 'WNW', 'NW', 'N', 'ESE', 'ENE']
    for var in wind_directions:
        df[f'WindGustDir_{var}'] = (df['WindGustDir'] == var).astype(int)
        df[f'WindDir9am_{var}'] = (df['WindDir9am'] == var).astype(int)
        df[f'WindDir3pm_{var}'] = (df['WindDir3pm'] == var).astype(int)

    df[f'RainToday_Yes'] = (df['RainToday']== 'Yes').astype(int)

    df['Date'] = pd.to_datetime(df['Date'])
    df['season'] = df['Date'].apply(asignar_estacion)

    # Crear columnas para season
    season_list = ['Spring', 'Summer','Winter' ]
    for season in season_list:
        df[f'season_{season}'] = (df['season']== season).astype(int)

    #Crear columnas para Location
    Location_list= ['Canberra','Cobar', 'Dartmoor','Melbourne','MelbourneAirport',\
                    'MountGambier','Sydney','SydneyAirport']
    for ciudad in  Location_list:
         df[f'Location_{ciudad}'] = (df['Location']== ciudad).astype(int)



    df = df.drop(columns=['WindGustDir', 'WindDir9am', 'WindDir3pm','Date',\
                          'season', 'Location', 'RainToday'])

    return df

In [163]:
# Título de la app
st.title('Pronostico de lluvia para mañana')

DeltaGenerator()

In [164]:
variables, var_numericas, var_categoricas, df_clima = preparar_features(df)

In [165]:
df_prediccion = pd.DataFrame(columns=variables)

In [166]:
features_numericas = [st.slider(columna,
            df_clima[columna].min(),
            df_clima[columna].max(),
            round(df_clima[columna].mean(), 2))
            for columna in var_numericas]

In [167]:
for columna in var_numericas:
    df_prediccion[columna] = [features_numericas[var_numericas.index(columna)]]

In [168]:
features_categoricas = [st.selectbox(columna, df_clima[columna].unique())
            for columna in var_categoricas]

In [169]:
for columna in var_categoricas:
    df_prediccion[columna] = [features_categoricas[var_categoricas.index(columna)]]

In [170]:
df_prediccion['Date'] = date.today()

In [171]:
df_prediccion_filtrada = preparar_prediccion(df_prediccion)
if df_prediccion_filtrada is not None:
    df_prediccion_filtrada['Prediccion_lluvia'] = classification_model.predict(df_prediccion_filtrada)[0]
    df_prediccion_filtrada['Prediccion_lluvia'] = df_prediccion_filtrada['Prediccion_lluvia'].astype(int)

    if df_prediccion_filtrada['Prediccion_lluvia'][0] == 1:
       resultado_clas =  '**sí** 🌧️'
       df_prediccion_filtrada['Prediccion_mm'] = regression_model.predict(df_prediccion_filtrada)[0]
       resultado_reg  = round(float(df_prediccion_filtrada['Prediccion_mm'][0]), 2)
    else:
        df_prediccion_filtrada['Prediccion_mm'] = 0.0
        resultado_clas = '**no** 🌞'
        resultado_reg = 0


# Mostramos las predicciones en la app
st.markdown(f'Probablemente mañana {resultado_clas} llueva , precipitaciones: {resultado_reg} mm/h de lluvia.')



DeltaGenerator()

In [173]:
df_prediccion_filtrada

Unnamed: 0,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,...,season_Winter,Location_Canberra,Location_Cobar,Location_Dartmoor,Location_Melbourne,Location_MelbourneAirport,Location_MountGambier,Location_Sydney,Location_SydneyAirport,Prediccion_lluvia
0,11.29,21.94,2.05,4.86,6.87,41.89,15.15,20.05,68.74,50.45,...,1,0,1,0,0,0,0,0,0,0
