In [None]:
#Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from xgboost.sklearn import XGBRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.gaussian_process.kernels import Matern
from sklearn.gaussian_process.kernels import ExpSineSquared
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.gaussian_process.kernels import RBF
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Dropout
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import sklearn.metrics as metrics
import seaborn as sns
from keras import backend as K

In [None]:
def regression_results(y_true, y_pred):
    # Regression metrics
    explained_variance=metrics.explained_variance_score(y_true, y_pred)
    mean_absolute_error=metrics.mean_absolute_error(y_true, y_pred) 
    mse=metrics.mean_squared_error(y_true, y_pred) 
    median_absolute_error=metrics.median_absolute_error(y_true, y_pred)
    r2=metrics.r2_score(y_true, y_pred)
    print('explained_variance: ', round(explained_variance,4))    
    print('r2: ', round(r2,4))
    print('MAE: ', round(mean_absolute_error,4))
    print('MSE: ', round(mse,4))
    print('RMSE: ', round(np.sqrt(mse),4))
    
def coeff_determination(y_true, y_pred):
    SS_res = K.sum(K.square(y_true-y_pred))
    SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
    return (1 - SS_res/(SS_tot + K.epsilon()))

In [None]:
#Import the data and create a DataFrame

data1 = pd.read_csv("training_dataset.csv") 
features = data1.iloc[:, [0, 1, 2, 3, 4, 5]]
output = data1.iloc[:, 6]

In [None]:
# Scale the feature vectors

scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

In [None]:
# Train, validation, test partition

X = scaled_features
y = output

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=1)

In [None]:
# ANN model construction

model = keras.Sequential()
model.add(Dense(45, activation='relu', input_shape=[6]))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(1))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4,decay=1e-4), 
              loss='mae', 
              metrics=[coeff_determination])
    
ann_model = model.fit(X_train, y_train, batch_size=128, epochs=2500, validation_data=(X_val, y_val))
y_pred_ann = ann_model.predict(X_test)

In [None]:
# Assessing the model accuracy

regression_results(y_test, y_pred_ann)

In [None]:
# Making new predictions

test_data = pd.read_csv("Prediction_matrix.csv") 
scaled_test_data = scaler.fit_transform(test_data)
scaled_test_data = pd.DataFrame(scaled_test_data)

ternary = []
ternary.append(model.predict(scaled_test_data))

In [None]:
# Writing the predictions into a CSV file

print(ternary)
import csv

with open('Predictions.csv', 'w') as f:
     
    # using csv.writer method from CSV package
    write = csv.writer(f)
     
    write.writerows(ternary)

In [None]:
# XGBoost predictions

xgb_model = XGBRegressor(n_estimators=150, booster='gbtree', learning_rate=0.2, random_state=0)

# Train XGBoost

xgb_model.fit(X_train, y_train)
y_pred_xgboost = xgb_model.predict(X_test)

In [None]:
# Assessing the model accuracy

regression_results(y_test, y_pred_xgboost)

In [None]:
# Different kernels to test

rbfkernel = 1.0 * RBF(length_scale=0.1, length_scale_bounds=(1e-1, 10.0))
expkernel = 1.0 * ExpSineSquared(
    length_scale=1.0,
    periodicity=3.0,
    length_scale_bounds=(0.1, 10.0),
    periodicity_bounds=(1.0, 10.0),
)

quadkernel = 1.0 * RationalQuadratic(length_scale=0.1, alpha=0.5, alpha_bounds=(1e-5, 1e15))
maternkernel = 1.0 * Matern(length_scale=0.1, length_scale_bounds=(1e-1, 10.0), nu=1.5)

# Train GPR

gpr_model = GaussianProcessRegressor(kernel = quadkernel,random_state=0).fit(X_train, y_train)
y_pred_gpr = gpr_model.predict(X_test)

In [None]:
# Assessing the model accuracy

regression_results(y_test, y_pred_gpr)

# New predictions can be made similar to ANN

In [None]:
# Final ensemble averaged model predictions

# Make predictions using each model
preds1 = ann_model.predict(scaled_test_data)
preds2 = xgb_model.predict(scaled_test_data)
preds3 = gpr_model.predict(scaled_test_data)

# Average the predictions
final_preds = (preds1 + preds2 + preds3) / 3