In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# Add the parent directory to the Python path
sys.path.append('..')

from src.data import load_data
from src.models import ModelWrapper
from src.optimizers import SGDOptimizer, GeneticAlgorithmOptimizer
from src.utils import set_seeds, create_results_directory, extract_data_from_generator
from src.visualization import plot_predictions, plot_fitness_history, plot_time_series_predictions

# Set random seeds for reproducibility
set_seeds(42)

# Create a results directory
results_dir = create_results_directory()
print(f"Results will be saved in: {results_dir}")

2024-10-21 11:50:38.121911: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-21 11:50:38.128834: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-21 11:50:38.136613: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-21 11:50:38.138786: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-21 11:50:38.145145: I tensorflow/core/platform/cpu_feature_guar

Results will be saved in: /home/dkat/computational-intelligence/src/../results/20241021_115038


In [2]:
# Load the data using TimeseriesGenerator
train_generator, val_generator, test_generator, scaler_X, scaler_y = load_data()

# Print data sequences
print("Data sequences:")
print(f"Train sequences: {len(train_generator)}")
print(f"Validation sequences: {len(val_generator)}")
print(f"Test sequences: {len(test_generator)}")

# Get input shape from one batch
x_batch, y_batch = train_generator[0]
input_shape = x_batch.shape[1:]  # Exclude batch size dimension
print(f"Input shape: {input_shape}")
print(f"Output shape: {y_batch.shape}")

2024-10-21 11:50:40,736 - INFO - Loading data from ../data/raw/Train.csv
2024-10-21 11:50:40,811 - INFO - Train data shape: (140160, 12)
2024-10-21 11:50:40,814 - INFO - Input columns: ['Temp_2m', 'RelHum_2m', 'DP_2m', 'WS_10m', 'WS_100m', 'WD_10m', 'WD_100m', 'WG_10m']
2024-10-21 11:50:40,814 - INFO - Output column: Power
2024-10-21 11:50:40,823 - INFO - Training set shape: (98112, 9)
2024-10-21 11:50:40,823 - INFO - Validation set shape: (21024, 9)
2024-10-21 11:50:40,824 - INFO - Test set shape: (21024, 9)
2024-10-21 11:50:40,831 - INFO - Data scaling and sequence generation completed


Data sequences:
Train sequences: 3066
Validation sequences: 657
Test sequences: 657
Input shape: (12, 8)
Output shape: (32, 1)


In [3]:
# Define model architecture
layer_sizes = [256, 128, 64]

# Create the model
model_wrapper = ModelWrapper(input_shape, layer_sizes)
model_wrapper.model.summary()

I0000 00:00:1729511443.318848  591815 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729511443.331265  591815 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729511443.331297  591815 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729511443.332921  591815 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1729511443.332948  591815 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [4]:
# Pretrain the model using SGD with momentum
sgd_optimizer = SGDOptimizer(learning_rate=0.001, momentum=0.9, epochs=20)
model_wrapper.set_optimizer(sgd_optimizer)
sgd_history = model_wrapper.train(train_generator, val_generator, results_dir=results_dir)

2024-10-21 11:50:48,485 - INFO - Optimizer set to: SGDOptimizer


NameError: name 'GeneticAlgorithmOptimizer' is not defined

In [None]:
# Evaluate the model after pretraining
test_loss_sgd = model_wrapper.evaluate(test_generator)
print(f"Test Loss after SGD pretraining: {test_loss_sgd:.4f}")

# Evaluate the model on the validation set before GA optimization
val_X, val_y = extract_data_from_generator(val_generator)
y_pred_val = model_wrapper.predict(val_X)
val_y_actual = scaler_y.inverse_transform(val_y)
y_pred_val_actual = scaler_y.inverse_transform(y_pred_val)

# Compute MSE
initial_mse = mean_squared_error(val_y_actual.flatten(), y_pred_val_actual.flatten())

print(f"Initial MSE on validation set before GA optimization: {initial_mse:.4f}")

In [None]:
# Now set the GA optimizer for further training
ga_optimizer_params = {
    "population_size": 25,
    "generations": 100,
    "tournament_size": 3,
    "mutation_rate": 0.01,
    "mutation_scale": 0.1,
    "patience": 10
}
ga_optimizer = GeneticAlgorithmOptimizer(**ga_optimizer_params)
model_wrapper.set_optimizer(ga_optimizer)

In [None]:
# Further train the model using GA optimizer
model_wrapper.train(train_generator, val_generator, scaler_y=scaler_y, results_dir=results_dir)

In [None]:
# Evaluate the model after GA optimization
test_loss_ga = model_wrapper.evaluate(test_generator)
print(f"Test Loss after GA optimization: {test_loss_ga:.4f}")

In [None]:
# Evaluate the model after GA optimization
test_loss_ga = model_wrapper.evaluate(test_generator)
print(f"Test Loss after GA optimization: {test_loss_ga:.4f}")

# Generate predictions
test_X, test_y = extract_data_from_generator(test_generator)
y_pred_scaled = model_wrapper.predict(test_X)

# Inverse transform the predictions
test_predictions = scaler_y.inverse_transform(y_pred_scaled)
test_actual = scaler_y.inverse_transform(test_y)

# Flatten arrays
test_actual_flat = test_actual.flatten()
test_predictions_flat = test_predictions.flatten()

# Calculate evaluation metrics
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(test_actual_flat, test_predictions_flat)
rmse = np.sqrt(mse)
r2 = r2_score(test_actual_flat, test_predictions_flat)

print("\nEvaluation Results:")
print(f"Test MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")
print(f"Test R2 Score: {r2:.4f}")

In [None]:
# Save the model's weights
model_save_path = os.path.join(results_dir, 'best_model.weights.h5')
model_wrapper.model.save_weights(model_save_path)
print(f"Model weights saved to: {model_save_path}")

# Plot predictions and save
plot_predictions(
    test_actual_flat,
    test_predictions_flat,
    "Hybrid SGD-GA Model Predictions",
    save_path=os.path.join(results_dir, 'hybrid_sgd_ga_predictions.png')
)

# Plot fitness history
fitness_history = ga_optimizer.fitness_history
plot_fitness_history(
    fitness_history,
    save_path=os.path.join(results_dir, 'hybrid_sgd_ga_fitness_history.png')
)

# Time series plot of predicted vs actual power over time and save
plot_time_series_predictions(
    test_actual_flat,
    test_predictions_flat,
    title="Hybrid SGD-GA Model Time Series Predictions",
    save_path=os.path.join(results_dir, 'hybrid_sgd_ga_time_series_predictions.png')
)