In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'tensorflow'

In [4]:
# Load your dataset
df = pd.read_csv('data.csv')

# 1. Data Preprocessing
print("Dataset types:", df.describe)
print("\nDataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())

# Convert date to datetime and set as index
df['fecha'] = pd.to_datetime(df['fecha'], dayfirst=True)
df = df.set_index('fecha')

# Check for missing values
print("\nMissing values:")
print(df.isnull().sum())


Dataset types: <bound method NDFrame.describe of            fecha  radiacion  precipitacion_total  viento_promedio  rocio_max  \
0     13/12/2017      19.39                  0.0              6.0        1.7   
1     14/12/2017      19.56                  0.0              6.4        1.2   
2     15/12/2017      19.26                  0.0              9.3        6.2   
3     16/12/2017      17.90                  0.0             15.8        7.7   
4     17/12/2017      14.30                  0.2              6.3        7.9   
...          ...        ...                  ...              ...        ...   
2857  09/10/2025      14.34                  7.5              3.8       14.9   
2858  10/10/2025      18.61                 21.9              4.7       14.4   
2859  11/10/2025      20.22                  9.6              4.8       13.3   
2860  12/10/2025      20.82                  0.2              4.9       13.4   
2861  13/10/2025      23.42                  0.7              5.6      

In [6]:
# Feature selection (excluding target)
features = ['temp_max', 'temp_min', 'presion_max', 'presion_min', 
        'rocio_max', 'rocio_min', 'viento_promedio', 'radiacion']
target = 'precipitacion_total'

# 2. Data Scaling
scaler_x = StandardScaler()
scaler_y = StandardScaler()

# Scale features
X_scaled = scaler_x.fit_transform(df[features])
# Scale target
y_scaled = scaler_y.fit_transform(df[[target]])

In [7]:
# 3. Create sequences for LSTM
def create_sequences(X, y, time_steps=30):
    X_seq, y_seq = [], []
    for i in range(time_steps, len(X)):
        X_seq.append(X[i-time_steps:i])
        y_seq.append(y[i])
    return np.array(X_seq), np.array(y_seq)

# Define time steps (using 30 days of historical data)
TIME_STEPS = 30
X_sequences, y_sequences = create_sequences(X_scaled, y_scaled, TIME_STEPS)

print(f"\nSequences shape: X={X_sequences.shape}, y={y_sequences.shape}")



Sequences shape: X=(2832, 30, 8), y=(2832, 1)


In [8]:
# 4. Train-Test Split (chronological split)
split_ratio = 0.8
split_index = int(len(X_sequences) * split_ratio)

X_train = X_sequences[:split_index]
X_test = X_sequences[split_index:]
y_train = y_sequences[:split_index]
y_test = y_sequences[split_index:]

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")

Training set: (2265, 30, 8), (2265, 1)
Test set: (567, 30, 8), (567, 1)


In [None]:
# 5. Build LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(TIME_STEPS, len(features))),
    Dropout(0.2),
    LSTM(50, return_sequences=True),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='mse', 
              metrics=['mae'])

print("\nModel Summary:")
model.summary()

In [None]:
# 6. Train the Model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=100,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
    verbose=1,
    shuffle=False  # Important for time series data
)

In [None]:
# 7. Evaluate the Model
train_loss = model.evaluate(X_train, y_train, verbose=0)
test_loss = model.evaluate(X_test, y_test, verbose=0)

print(f"\nTraining Loss: {train_loss[0]:.4f}, MAE: {train_loss[1]:.4f}")
print(f"Test Loss: {test_loss[0]:.4f}, MAE: {test_loss[1]:.4f}")

In [None]:
# 8. Make Predictions
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_actual = scaler_y.inverse_transform(y_test)

In [None]:
# Calculate metrics
mse = mean_squared_error(y_actual, y_pred)
mae = mean_absolute_error(y_actual, y_pred)
rmse = np.sqrt(mse)

print(f"\nPerformance Metrics:")
print(f"Mean Squared Error: {mse:.4f}")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"Root Mean Squared Error: {rmse:.4f}")

In [None]:
# 9. Plot Results
plt.figure(figsize=(15, 5))

# Plot training history
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot predictions vs actual
plt.subplot(1, 2, 2)
plt.plot(y_actual, label='Actual Precipitation', alpha=0.7)
plt.plot(y_pred, label='Predicted Precipitation', alpha=0.7)
plt.title('Actual vs Predicted Precipitation')
plt.xlabel('Time Steps')
plt.ylabel('Precipitation')
plt.legend()

plt.tight_layout()
plt.show()



In [None]:
# 10. Feature Importance Analysis (Optional)
# Get the last layer weights to understand feature importance
print("\nFeature names:", features)

# 11. Save the Model
model.save('rainfall_prediction_lstm.h5')
print("\nModel saved as 'rainfall_prediction_lstm.h5'")


In [None]:
# Function to make future predictions
def predict_future(model, last_sequence, days=7):
    """
    Predict future precipitation for specified number of days
    """
    future_predictions = []
    current_sequence = last_sequence.copy()
    
    for _ in range(days):
        # Predict next day
        next_pred = model.predict(current_sequence.reshape(1, TIME_STEPS, len(features)))
        future_predictions.append(next_pred[0, 0])
        
        # Update sequence (remove first, add prediction)
        current_sequence = np.roll(current_sequence, -1, axis=0)
        # For the new row, we need to create a dummy row with the prediction
        # In practice, you'd need actual feature values for future dates
        new_row = np.zeros(len(features))
        # This is simplified - you'd need a better approach for real future predictions
        current_sequence[-1] = new_row
    
    future_predictions = scaler_y.inverse_transform(
        np.array(future_predictions).reshape(-1, 1)
    )
    return future_predictions

# Example of future prediction (using the last sequence from test set)
if len(X_test) > 0:
    last_sequence = X_test[-1]
    future_pred = predict_future(model, last_sequence, days=7)
    print(f"\nNext 7 days precipitation predictions:")
    for i, pred in enumerate(future_pred, 1):
        print(f"Day {i}: {pred[0]:.2f} mm")