In [10]:
CONFIG = {
    'full_dataset' : "../data/full_dataset.csv",
    'target' : 'Mean_freq'
    }
features = {"Rainfall_rolling_mean": True, "Rainfall": True, "Temp" : True, "Wind" : True, "Pressure" : True, "Humidity" : True}
CONFIG['features'] = [k for k,v in features.items() if v]

In [11]:
import random
import numpy as np
import torch

def set_seed(seed):
    # Python random module
    random.seed(seed)
    
    # Numpy random module
    np.random.seed(seed)
    
    # PyTorch random seed
    torch.manual_seed(seed)
    
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # For multi-GPU setups
    
SEED = 42
set_seed(SEED)

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load data
df = pd.read_csv(CONFIG['full_dataset'], sep=",")

print(df.columns)

df.drop(columns=['Mean_am', 'Std_am', 'Unnamed: 0'], axis=1, inplace=True)

# Define target and features
target = CONFIG['target']

#Remove strange outliers
df = df[df['Mean_freq'] > 6.3]

df['Rainfall_rolling_mean'] = df['Rainfall'].rolling(window=600).mean()

df.dropna(inplace=True)

split_date_earthquake = '2024-08-13'

normal_df = df[df['Dates UTC'] < split_date_earthquake].copy()
test_df = df[df['Dates UTC'] >= split_date_earthquake].copy()

# Split data
X = normal_df[CONFIG['features']]
y = normal_df[target]

X_test = test_df[CONFIG['features']]
y_test = test_df[target]


# Scale data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

X_test_scaled = scaler_X.transform(X_test)
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.1, random_state=42, shuffle=False)

Index(['Unnamed: 0', 'Dates UTC', 'Mean_freq', 'Std_fn', 'Mean_am', 'Std_am',
       'Acc_1h', 'Temp', 'Humidity', 'Wind', 'Wind_dir', 'Wind_run',
       'Wind_max', 'Pressure', 'Rainfall', 'Solar_rad', 'Solar_energy', 'THW'],
      dtype='object')


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Convert data to PyTorch tensors and move to GPU
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32).to(device)

# Create data loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=2048, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)

# Define the neural network
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, dropout=0.3):
        super(NeuralNetwork, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        return self.layers(x)

# Initialize the model and move to GPU
input_dim = X_train.shape[1]
model = NeuralNetwork(input_dim, dropout=0.1).to(device)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



Using device: cuda


In [15]:
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(loader)

def validate_epoch(model, loader, criterion):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
    return val_loss / len(loader)

In [16]:
train_model = True
if train_model:
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    best_model = None
    epochs = 2000

    progress_bar = tqdm(range(epochs), desc="Training Progress", total=epochs)

    for epoch in progress_bar:
        train_loss = train_epoch(model, train_loader, criterion, optimizer)
        val_loss = validate_epoch(model, val_loader, criterion)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model.state_dict()
            torch.save(best_model, f"../models/FFNN_best_model.pth")
            print(f"Best model saved | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # Update tqdm description with current losses
        progress_bar.set_description(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")


Epoch 1 | Train Loss: 1.4474, Val Loss: 0.6717:   0%|          | 0/2000 [00:00<?, ?it/s]

Best model saved | Train Loss: 1.4474, Val Loss: 0.6717


Epoch 2 | Train Loss: 0.8757, Val Loss: 0.4365:   0%|          | 2/2000 [00:00<03:50,  8.66it/s]

Best model saved | Train Loss: 0.8757, Val Loss: 0.4365


Epoch 5 | Train Loss: 0.5359, Val Loss: 0.4000:   0%|          | 4/2000 [00:00<02:46, 11.96it/s]

Best model saved | Train Loss: 0.7270, Val Loss: 0.3680


Epoch 379 | Train Loss: 0.3531, Val Loss: 0.3710:  19%|█▉        | 378/2000 [00:28<01:53, 14.32it/s]

Best model saved | Train Loss: 0.3533, Val Loss: 0.3675


Epoch 393 | Train Loss: 0.3492, Val Loss: 0.3744:  20%|█▉        | 392/2000 [00:29<01:52, 14.33it/s]

Best model saved | Train Loss: 0.3564, Val Loss: 0.3628


Epoch 413 | Train Loss: 0.3464, Val Loss: 0.3758:  21%|██        | 412/2000 [00:31<01:50, 14.36it/s]

Best model saved | Train Loss: 0.3478, Val Loss: 0.3596


Epoch 552 | Train Loss: 0.3397, Val Loss: 0.3585:  28%|██▊       | 552/2000 [00:41<01:36, 15.00it/s]

Best model saved | Train Loss: 0.3408, Val Loss: 0.3577


Epoch 559 | Train Loss: 0.3389, Val Loss: 0.3759:  28%|██▊       | 558/2000 [00:42<01:39, 14.44it/s]

Best model saved | Train Loss: 0.3397, Val Loss: 0.3573


Epoch 597 | Train Loss: 0.3347, Val Loss: 0.3849:  30%|██▉       | 596/2000 [00:45<01:41, 13.82it/s]

Best model saved | Train Loss: 0.3391, Val Loss: 0.3570


Epoch 644 | Train Loss: 0.3362, Val Loss: 0.3832:  32%|███▏      | 644/2000 [00:48<01:40, 13.48it/s]

Best model saved | Train Loss: 0.3346, Val Loss: 0.3548


Epoch 659 | Train Loss: 0.3336, Val Loss: 0.3716:  33%|███▎      | 658/2000 [00:49<01:37, 13.72it/s]

Best model saved | Train Loss: 0.3339, Val Loss: 0.3542


Epoch 698 | Train Loss: 0.3311, Val Loss: 0.3761:  35%|███▍      | 698/2000 [00:52<01:25, 15.28it/s]

Best model saved | Train Loss: 0.3324, Val Loss: 0.3539


Epoch 712 | Train Loss: 0.3247, Val Loss: 0.3956:  36%|███▌      | 712/2000 [00:53<01:41, 12.66it/s]

Best model saved | Train Loss: 0.3318, Val Loss: 0.3523


Epoch 723 | Train Loss: 0.3292, Val Loss: 0.3690:  36%|███▌      | 722/2000 [00:54<01:27, 14.57it/s]

Best model saved | Train Loss: 0.3264, Val Loss: 0.3427


Epoch 2000 | Train Loss: 0.2842, Val Loss: 0.3850: 100%|██████████| 2000/2000 [02:26<00:00, 13.64it/s]


In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_squared_error
import torch
import pandas as pd

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("../models/FFNN_best_model.pth", weights_only=True))
model.to(device).eval()

# Move tensors to GPU
for tensor in [X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, X_test_tensor, y_test_tensor]:
    tensor.to(device)

# Predictions
train_predictions = model(X_train_tensor).detach().cpu().numpy()
val_predictions = model(X_val_tensor).detach().cpu().numpy()
test_predictions = model(X_test_tensor).detach().cpu().numpy()

# Move actual values to CPU and inverse transform
y_train_actual = scaler_y.inverse_transform(y_train_tensor.cpu().numpy())
y_val_actual = scaler_y.inverse_transform(y_val_tensor.cpu().numpy())
y_test_actual = scaler_y.inverse_transform(y_test_tensor.cpu().numpy())

train_predictions = scaler_y.inverse_transform(train_predictions)
val_predictions = scaler_y.inverse_transform(val_predictions)
test_predictions = scaler_y.inverse_transform(test_predictions)

# Define conservative recovery period
recovery_start, recovery_end = pd.to_datetime('2024-08-13'), pd.to_datetime('2024-08-13') + pd.DateOffset(days=2.5*30)

# Ensure 'Dates UTC' is a datetime object
for df in [df, normal_df, test_df]:
    df['Dates UTC'] = pd.to_datetime(df['Dates UTC'])

train_indices = normal_df['Dates UTC'][:len(X_train_tensor)]
val_indices = normal_df['Dates UTC'][len(X_train_tensor):len(X_train_tensor) + len(X_val_tensor)]
test_indices = test_df['Dates UTC'][:len(X_test_tensor)]

# Filter out recovery period from test set
mask_no_recovery = (test_indices < recovery_start) | (test_indices > recovery_end)
test_indices_no_recovery = test_indices[mask_no_recovery]
test_predictions_no_recovery = test_predictions[mask_no_recovery]
y_test_actual_no_recovery = y_test_actual[mask_no_recovery]

# Compute RMSE
train_rmse = np.sqrt(mean_squared_error(y_train_actual, train_predictions))
val_rmse = np.sqrt(mean_squared_error(y_val_actual, val_predictions))
test_rmse_no_recovery = np.sqrt(mean_squared_error(y_test_actual_no_recovery, test_predictions_no_recovery))
train_val_test_rmse = np.sqrt(mean_squared_error(
    np.concatenate([y_train_actual, y_val_actual, y_test_actual_no_recovery]),
    np.concatenate([train_predictions, val_predictions, test_predictions_no_recovery])
))

# Calculate residuals
train_residuals = np.abs(y_train_actual.flatten() - train_predictions.flatten())
val_residuals = np.abs(y_val_actual.flatten() - val_predictions.flatten())
test_residuals = np.abs(y_test_actual.flatten() - test_predictions.flatten())

# Moving averages
ma_window = 100
train_residuals_ma = np.convolve(train_residuals, np.ones(ma_window)/ma_window, mode='valid')
val_residuals_ma = np.convolve(val_residuals, np.ones(ma_window)/ma_window, mode='valid')
test_residuals_ma = np.convolve(test_residuals, np.ones(ma_window)/ma_window, mode='valid')

# Create subplots
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1)

# Plot actual vs predicted
fig.add_trace(go.Scatter(x=train_indices, y=y_train_actual.flatten(), mode='lines', line=dict(color='blue'), name='Actual', showlegend=False), row=1, col=1)
fig.add_trace(go.Scatter(x=train_indices, y=train_predictions.flatten(), mode='lines', line=dict(color='red'), name='Train Predicted', showlegend=False), row=1, col=1)
fig.add_trace(go.Scatter(x=val_indices, y=y_val_actual.flatten(), mode='lines', line=dict(color='blue'), name='Validation Actual', showlegend=False), row=1, col=1)
fig.add_trace(go.Scatter(x=val_indices, y=val_predictions.flatten(), mode='lines', line=dict(color='orange'), name='Validation Predicted', showlegend=False), row=1, col=1)
fig.add_trace(go.Scatter(x=test_indices, y=y_test_actual.flatten(), mode='lines', line=dict(color='blue'), name='Test Actual', showlegend=False), row=1, col=1)
fig.add_trace(go.Scatter(x=test_indices, y=test_predictions.flatten(), mode='lines', line=dict(color='green'), name='Test Predicted', showlegend=False), row=1, col=1)

# Highlight recovery period
fig.add_vrect(x0=recovery_start, x1=recovery_end, fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0, row=1, col=1)
fig.add_vrect(x0=recovery_start, x1=recovery_end, fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0, row=2, col=1)

# Plot residuals as lines
fig.add_trace(go.Scatter(x=train_indices, y=train_residuals, mode='lines', line=dict(color='red'), name='Train Residuals', showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=val_indices, y=val_residuals, mode='lines', line=dict(color='orange'), name='Validation Residuals', showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=test_indices, y=test_residuals, mode='lines', line=dict(color='green'),name='Test Residuals', showlegend=False), row=2, col=1)

# Plot moving averages of residuals
fig.add_trace(go.Scatter(x=train_indices[ma_window-1:], y=train_residuals_ma, mode='lines', line=dict(color='black'), name='Train Residuals MA', showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=val_indices[ma_window-1:], y=val_residuals_ma, mode='lines', line=dict(color='black'), name='Validation Residuals MA', showlegend=False), row=2, col=1)
fig.add_trace(go.Scatter(x=test_indices[ma_window-1:], y=test_residuals_ma, mode='lines', line=dict(color='black'), name='Residuals<br>Moving Average', showlegend=False), row=2, col=1)

# Update layout
# Update subplot titles' font size
fig.update_layout(
    title={
        "text": f"Actual vs Predicted <br>RMSE Train: {train_rmse:.4f}, RMSE Validation:{val_rmse:.4f}, RMSE Test: {test_rmse_no_recovery:.4f}, RMSE Overall:{train_val_test_rmse:.4f}",
        "font": {"size": 24}  # Increase title font size
    },
    xaxis2={  # X-axis for the second subplot
        "title": "Date",
        "titlefont": {"size": 22},  
        "tickfont": {"size": 16}  
    },
    yaxis={
        "title": "Frequency",
        "titlefont": {"size": 22},  # Increase Y-axis label font size (for the first subplot)
        "tickfont": {"size": 16}  
    },
    yaxis2={  # Y-axis for the second subplot
        "title": "Residuals",
        "titlefont": {"size": 22},  
        "tickfont": {"size": 16}  
    },
    legend={
        "font": {"size": 24}  # Increase legend font size
    },
    height=600
)

# Show the figure
fig.show()