In [2]:
# Import necessary libraries
import os
import sys
import json
import numpy as np
import pandas as pd
from datetime import datetime
import tensorflow as tf
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Add parent directory to Python path
sys.path.append('..')

# Import our modules
from src.data import DataProcessor
from src.models import ModelWrapper
from src.visualization import plot_training_history, plot_power_predictions
from src.utils import set_seeds

In [3]:
# Set random seed for reproducibility
set_seeds(42)

2024-11-04 14:58:16,812 - INFO - Random seeds set to 42


In [None]:
# Create results directory
results_dir = os.path.join('..', 'results', datetime.now().strftime('%Y%m%d_%H%M%S'))
os.makedirs(results_dir, exist_ok=True)

In [6]:
# Step 1: Load and Process Data
file_path = '../data/raw/Train.csv'
target_column = 'Power'

# Initialize Data Processor
data_processor = DataProcessor(file_path, target_column)
train_data, val_data, test_data = data_processor.prepare_data()

# Print data shapes for verification
print("\nData Shapes:")
print(f"Training data: {train_data.element_spec[0].shape}")
print(f"Validation data: {val_data.element_spec[0].shape}")
print(f"Test data: {test_data.element_spec[0].shape}")


Data Ranges (scaled):
Training   - X: (98101, 15, 11), y: min=0.0000, max=1.0000
Validation - X: (28029, 15, 11), y: min=0.0000, max=0.9971
Test      - X: (14015, 15, 11), y: min=0.0000, max=0.9969

Temporal Split Check:
Training period: 2013-01-02 00:00:00 to 2015-10-20 21:00:00
Validation period: 2015-10-20 21:00:00 to 2016-08-07 20:00:00

Data Shapes:
Training data: (None, 15, 11)
Validation data: (None, 15, 11)
Test data: (None, 15, 11)


In [8]:
# Step 2: Initialize Model
model_wrapper = ModelWrapper(
    input_shape=(train_data.element_spec[0].shape[1], train_data.element_spec[0].shape[2])
)

# Display model architecture
model_wrapper.model.summary()

In [9]:
# Step 3: Train Model
print("\nTraining model...")
history = model_wrapper.fit(train_data, val_data)


Training model...
Epoch 1/50


2024-11-04 14:58:49.475147: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m3066/3066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 6ms/step - loss: 0.1330 - mae: 0.1887 - val_loss: 0.0461 - val_mae: 0.1738 - learning_rate: 0.0010
Epoch 2/50
[1m3066/3066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - loss: 0.0436 - mae: 0.1649 - val_loss: 0.0427 - val_mae: 0.1633 - learning_rate: 0.0010
Epoch 3/50
[1m3066/3066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - loss: 0.0413 - mae: 0.1600 - val_loss: 0.0409 - val_mae: 0.1581 - learning_rate: 0.0010
Epoch 4/50
[1m3066/3066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - loss: 0.0403 - mae: 0.1583 - val_loss: 0.0404 - val_mae: 0.1560 - learning_rate: 0.0010
Epoch 5/50
[1m3066/3066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - loss: 0.0394 - mae: 0.1559 - val_loss: 0.0399 - val_mae: 0.1551 - learning_rate: 0.0010
Epoch 6/50
[1m3066/3066[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - loss: 0.0384 - mae: 0.153

In [10]:
# Step 4: Visualize Training History
print("\nPlotting training history...")
plot_training_history(
    history.history, 
    save_path=os.path.join(results_dir, 'training_history.png')
)

2024-11-04 15:13:12,424 - INFO - Plot style set
2024-11-04 15:13:12,544 - INFO - Training history plot saved to results/20241104_145819/training_history.png



Plotting training history...


In [11]:
# Step 5: Evaluate on Test Set
print("\nEvaluating model on test set...")
test_loss = model_wrapper.model.evaluate(test_data, verbose=1)
print(f"Test Loss: {test_loss}")


Evaluating model on test set...
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0257 - mae: 0.1179
Test Loss: [0.034322433173656464, 0.13584671914577484]


In [None]:
# Step 6: Generate Predictions
print("\nGenerating predictions...")
predictions = model_wrapper.model.predict(test_data)

# Extract actual values from test dataset
test_actual = np.concatenate([y for x, y in test_data], axis=0)

# Get corresponding timestamps
test_timestamps = data_processor.get_test_timestamps()

# Inverse transform predictions and actual values for proper comparison
predictions_unscaled = data_processor.scaler.inverse_transform(predictions)
actual_unscaled = data_processor.scaler.inverse_transform(test_actual.reshape(-1, 1))


Generating predictions...
[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


2024-11-04 15:13:29.903201: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


NameError: name 'config' is not defined

In [None]:
# Step 7: Visualize Predictions
print("\nPlotting predictions...")
plot_power_predictions(
    actual=actual_unscaled.flatten(),
    predicted=predictions_unscaled.flatten(),
    timestamps=test_timestamps,
    save_path=os.path.join(results_dir, 'predictions.png')
)

In [None]:
# Step 8: Save Model and Results
print("\nSaving model and results...")

# Save model weights
model_weights_path = os.path.join(results_dir, 'model_weights.h5')
model_wrapper.model.save_weights(model_weights_path)

# Save model architecture
model_json = model_wrapper.model.to_json()
with open(os.path.join(results_dir, 'model_architecture.json'), 'w') as f:
    f.write(model_json)

# Save training history
with open(os.path.join(results_dir, 'training_history.json'), 'w') as f:
    json.dump(history.history, f)

# Calculate comprehensive metrics
mse = mean_squared_error(actual_unscaled, predictions_unscaled)
rmse = np.sqrt(mse)
r2 = r2_score(actual_unscaled, predictions_unscaled)
mae = np.mean(np.abs(predictions_unscaled - actual_unscaled))

# Save all results
results = {
    'model_config': config,
    'test_metrics': {
        'loss': float(test_loss),
        'mse': float(mse),
        'rmse': float(rmse),
        'r2': float(r2),
        'mae': float(mae)
    },
    'data_info': {
        'train_size': len(train_data),
        'val_size': len(val_data),
        'test_size': len(test_data),
        'features': list(data_processor.data.columns),
        'target': target_column
    },
    'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

with open(os.path.join(results_dir, 'results.json'), 'w') as f:
    json.dump(results, f, indent=4)

# Print final metrics
print("\nFinal Test Metrics:")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")
print(f"MAE: {mae:.4f}")

print(f"\nResults saved in: {results_dir}")