In [None]:
# Import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.layers import Dropout
from keras import regularizers
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import joblib
import colour
import random

# Load the data from a CSV file.
data = pd.read_csv('training.csv')

# Split the data into input (X) and output (y).
X = data.iloc[:, :3]
y = data.iloc[:, 3:]

# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the input data.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the model architecture
def create_model(hidden_size=10, num_layers=1, learning_rate=0.0001, dropout_rate=0.0, weight_decay=0.0, activation='relu', optimizer='Adam'):
    model = keras.Sequential()
    model.add(layers.Dense(hidden_size, activation=activation, input_dim=3, kernel_regularizer=regularizers.l2(weight_decay)))
    model.add(layers.Dropout(dropout_rate))
    for _ in range(num_layers):
        model.add(layers.Dense(hidden_size, activation=activation, kernel_regularizer=regularizers.l2(weight_decay)))
        model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(3))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')
    return model

# Use KerasRegressor wrapper
model = KerasRegressor(model=create_model, epochs=100, batch_size=10, verbose=0, hidden_size=50, num_layers=1, learning_rate=0.0001, dropout_rate=0.0, weight_decay=0.0, activation='relu', optimizer='Adam')

# Grid search hyperparameters
hidden_size = [500, 600, 700]
num_layers = [1, 2, 3, 4, 5]  # testing from 1 to 5 layers
learning_rate = [0.0001, 0.001, 0.01]
batch_size = [32, 64, 128]
dropout_rate = [0.0, 0.05, 0.1, 0.2]
weight_decay = [0.0, 0.01, 0.001]
activations = ['relu']
param_grid = dict(hidden_size=hidden_size, num_layers=num_layers, learning_rate=learning_rate, batch_size=batch_size, dropout_rate=dropout_rate, weight_decay=weight_decay, activation=activations)

# Grid search with parallel execution
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1)
grid_result = grid.fit(X_train_scaled, y_train)

# Print the best hyperparameters
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")

# Data Augmentation - adding Gaussian noise
noise_std = 0.01
noise = np.random.normal(0, noise_std, X_train_scaled.shape)
X_train_scaled += noise

# Split the training data into training and validation sets.
X_train_scaled, X_val_scaled, y_train, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=42)

# Create an ensemble of models. Each model in the ensemble is a neural network with the same architecture but different initial weights. 
models = []
history = []  # to store history of each model
for i in range(5):  # creating 5 models
    model = create_model(hidden_size=grid_result.best_params_['hidden_size'], 
                         num_layers=grid_result.best_params_['num_layers'],
                         learning_rate=grid_result.best_params_['learning_rate'], 
                         dropout_rate=grid_result.best_params_['dropout_rate'], 
                         weight_decay=grid_result.best_params_['weight_decay'], 
                         activation=grid_result.best_params_['activation'])
    hist = model.fit(X_train_scaled, y_train, validation_data=(X_val_scaled, y_val), epochs=100, batch_size=grid_result.best_params_['batch_size'], verbose=0)
    history.append(hist)
    models.append(model)
    # Save each model
    model.save(f'model_{i}.h5')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Save the grid search results
joblib.dump(grid_result.best_params_, 'best_params.pkl')

# Plot the training and validation loss for each model
for i, hist in enumerate(history):
    plt.figure(figsize=(10, 6))
    plt.plot(hist.history['loss'])
    plt.plot(hist.history['val_loss'])
    plt.title(f'Model {i+1} loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.savefig(f'loss_{i+1}.png')
    plt.close()

# Each model in the ensemble makes predictions on the test set.
predictions = []
for model in models:
    predictions.append(model.predict(X_test_scaled))

# Average the predictions of the ensemble to get the final prediction.
y_pred_ensemble = np.mean(predictions, axis=0)

# Visualize the predictions of the ensemble model
plt.scatter(y_test, y_pred_ensemble)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Prediction by Ensemble Model')
plt.grid(True)
plt.savefig('scatter_predictions.png')
plt.close()

# Finally, calculate the mean squared error of the predictions. 
mse_ensemble = mean_squared_error(y_test, y_pred_ensemble)
print(f"Mean Squared Error of the ensemble model: {mse_ensemble}")