In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Load the data

In [2]:
# Load the dataset
training_set = pd.read_csv('../Dataset/training_set.csv', sep=';')
testing_set = pd.read_csv('../Dataset/testing_set.csv', sep=';')

In [None]:
training_set.head()

In [None]:
testing_set.head()

In [3]:
# Load the dataset normalized
training_set_norm = pd.read_csv('../Dataset/training_set_normalized_y.csv', sep=';')
testing_set_norm = pd.read_csv('../Dataset/testing_set_normalized_y.csv', sep=';')

In [None]:
training_set_norm.head()

In [None]:
testing_set_norm.head()

### Visualize the data

In [None]:
# Plot of each feature by y
plt.figure(figsize=(20, 10))
plt.subplot(3, 3, 1)
plt.scatter(training_set['hw1'], training_set['y'])
plt.xlabel('hw1')
plt.ylabel('y')
plt.subplot(3, 3, 2)
plt.scatter(training_set['hw2'], training_set['y'])
plt.xlabel('hw2')
plt.ylabel('y')
plt.subplot(3, 3, 3)
plt.scatter(training_set['tw'], training_set['y'])
plt.xlabel('tw')
plt.ylabel('y')
plt.subplot(3, 3, 4)
plt.scatter(training_set['bf1'], training_set['y'])
plt.xlabel('bf1')
plt.ylabel('y')
plt.subplot(3, 3, 5)
plt.scatter(training_set['bf2'], training_set['y'])
plt.xlabel('bf2')
plt.ylabel('y')
plt.subplot(3, 3, 6)
plt.scatter(training_set['tf1'], training_set['y'])
plt.xlabel('tf1')
plt.ylabel('y')
plt.subplot(3, 3, 7)
plt.scatter(training_set['tf2'], training_set['y'])
plt.xlabel('tf2')
plt.ylabel('y')
plt.subplot(3, 3, 8)
plt.scatter(training_set['psi'], training_set['y'])
plt.xlabel('psi')
plt.ylabel('y')
plt.subplot(3, 3, 9)
plt.scatter(training_set['L'], training_set['y'])
plt.xlabel('L')
plt.ylabel('y')
plt.show()

In [None]:
# Plot of each feature by y normalized
plt.figure(figsize=(20, 10))
plt.subplot(3, 3, 1)
plt.scatter(training_set_norm['hw1'], training_set_norm['y'])
plt.xlabel('hw1')
plt.ylabel('y')
plt.subplot(3, 3, 2)
plt.scatter(training_set_norm['hw2'], training_set_norm['y'])
plt.xlabel('hw2')
plt.ylabel('y')
plt.subplot(3, 3, 3)
plt.scatter(training_set_norm['tw'], training_set_norm['y'])
plt.xlabel('tw')
plt.ylabel('y')
plt.subplot(3, 3, 4)
plt.scatter(training_set_norm['bf1'], training_set_norm['y'])
plt.xlabel('bf1')
plt.ylabel('y')
plt.subplot(3, 3, 5)
plt.scatter(training_set_norm['bf2'], training_set_norm['y'])
plt.xlabel('bf2')
plt.ylabel('y')
plt.subplot(3, 3, 6)
plt.scatter(training_set_norm['tf1'], training_set_norm['y'])
plt.xlabel('tf1')
plt.ylabel('y')
plt.subplot(3, 3, 7)
plt.scatter(training_set_norm['tf2'], training_set_norm['y'])
plt.xlabel('tf2')
plt.ylabel('y')
plt.subplot(3, 3, 8)
plt.scatter(training_set_norm['psi'], training_set_norm['y'])
plt.xlabel('psi')
plt.ylabel('y')
plt.subplot(3, 3, 9)
plt.scatter(training_set_norm['L'], training_set_norm['y'])
plt.xlabel('L')
plt.ylabel('y')
plt.show()

In [None]:
# Correlation matrix heatmap
import seaborn as sns

f, ax = plt.subplots(figsize=(10, 10))
corr = training_set_norm.corr()
hm = sns.heatmap(round(corr, 2), annot=True, ax=ax, cmap="coolwarm", fmt='.2f',
                 linewidths=.05)
f.subplots_adjust(top=0.93)
t = f.suptitle('Correlation Heatmap', fontsize=14)

### Neural Network Model

* Activation function is the hyperbolic tangent function

* Loss function is the mean squared error

* Optimizer is the stochastic gradient descent

* Network architecture: 9 x 18 x 1

In [4]:
from keras import backend

def r_squared(y_true, y_pred):
    SS_res = backend.sum(backend.square(y_true - y_pred))
    SS_tot = backend.sum(backend.square(y_true - backend.mean(y_true)))
    return 1 - SS_res / (SS_tot + backend.epsilon())

2023-12-28 12:37:43.619752: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-28 12:37:43.972302: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-28 12:37:44.038421: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-12-28 12:37:44.038447: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudar

In [7]:
# Build the neural network model
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import regularizers
from keras import optimizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

model = Sequential()
model.add(Dense(9, input_dim=9, activation='tanh'))
model.add(Dense(128, activation='tanh'))
model.add(Dense(16, activation='tanh'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae'])

optimizer = optimizers.Adam(learning_rate=0.001)

model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=[r_squared])

reduce_learning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001, verbose=1)

# Train the model
history = model.fit(training_set_norm.drop(['y'], axis=1), training_set_norm['y'], epochs=10000, batch_size=32, verbose=1, validation_data=(testing_set_norm.drop(['y'], axis=1), testing_set_norm['y']))

# Save the trained model
model.save('model.h5')

Epoch 1/2
Epoch 2/2


In [8]:
# load the model
from keras.models import load_model

model = load_model('model.h5', custom_objects={'r_squared': r_squared})

# Evaluate the model
score = model.evaluate(testing_set_norm.drop(['y'], axis=1), testing_set_norm['y'], verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Test loss: 2.708162784576416
Test accuracy: -0.020226847380399704


In [None]:
# print(history.history.keys())
# # Plot the training history
# plt.figure(figsize=(20, 10))
# plt.subplot(1, 2, 1)
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend(['Train', 'Validation'], loc='upper right')
# plt.subplot(1, 2, 2)
# plt.plot(history.history['r_squared'])
# plt.plot(history.history['val_r_squared'])
# plt.title('R^2')
# plt.xlabel('Epoch')
# plt.ylabel('R^2')
# plt.legend(['Train', 'Validation'], loc='upper right')
# plt.show()

# # Evaluate the model
# scores = model.evaluate(testing_set_norm.drop(['y'], axis=1), testing_set_norm['y'])
# print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
import pyrenn

# Convert the model to pyrenn
pyrenn_model = pyrenn.CreateNN([9, 18, 1])

P = training_set_norm.drop(['y'], axis=1).values
Y = training_set_norm['y'].values

P = P.T
Y = Y.reshape(1, -1)

pyrenn.train_LM(P, Y, pyrenn_model, E_stop=1e-5, k_max=500, verbose=True)

# save the model
pyrenn.saveNN(pyrenn_model, 'pyrenn_model')

# Predict the output of the testing set
y_pred = pyrenn.NNOut(testing_set_norm.drop(['y'], axis=1).values.T, pyrenn_model)

# Plot the predicted output of the testing set
plt.figure(figsize=(20, 10))
plt.plot(testing_set_norm['y'], label='Testing set')
plt.plot(y_pred, label='Prediction')
plt.title('Prediction')
plt.xlabel('Sample')
plt.ylabel('y')
plt.legend()
plt.show()

# Evaluate the model
from sklearn.metrics import mean_squared_error, r2_score

print('Mean squared error: %.2f' % mean_squared_error(testing_set_norm['y'], y_pred))
print('Coefficient of determination: %.2f' % r2_score(testing_set_norm['y'], y_pred))


In [None]:
# Scatter plot of the predicted output on th training set by the real output
# R^2 on the legend
plt.scatter(training_set_norm['y'], pyrenn.NNOut(training_set_norm.drop(['y'], axis=1).values.T, pyrenn_model))
plt.title('Prediction')
plt.xlabel('Real output')
plt.ylabel('Predicted output')
r2 = r2_score(training_set_norm['y'], pyrenn.NNOut(training_set_norm.drop(['y'], axis=1).values.T, pyrenn_model))
plt.legend(['R^2 = ' + str(r2)])
plt.show()

In [None]:
# Scatter plot of the predicted by the real output of the testing set
plt.scatter(testing_set_norm['y'], y_pred)
plt.title('Prediction')
plt.xlabel('Real')
plt.ylabel('Predicted')
r2 = r2_score(testing_set_norm['y'], y_pred)
plt.legend(['R^2 = ' + str(r2)])
plt.show()


In [None]:
import pyrenn

# Convert the model to pyrenn
pyrenn_model1 = pyrenn.CreateNN([9, 64, 64, 1])

P = training_set_norm.drop(['y'], axis=1).values
Y = training_set_norm['y'].values

P = P.T
Y = Y.reshape(1, -1)

pyrenn.train_LM(P, Y, pyrenn_model1, E_stop=1e-5, k_max=500, verbose=True)

# save the model
pyrenn.saveNN(pyrenn_model1, 'pyrenn_model1')

# Predict the output of the testing set
y_pred1 = pyrenn.NNOut(testing_set_norm.drop(['y'], axis=1).values.T, pyrenn_model1)

# Plot the predicted output of the testing set
plt.plot(testing_set_norm['y'], label='Testing set')
plt.plot(y_pred1, label='Prediction')
plt.title('Prediction')
plt.xlabel('Sample')
plt.ylabel('y')
plt.legend()
plt.show()

# Evaluate the model
from sklearn.metrics import mean_squared_error, r2_score

print('Mean squared error: %.2f' % mean_squared_error(testing_set_norm['y'], y_pred1))
print('Coefficient of determination: %.2f' % r2_score(testing_set_norm['y'], y_pred1))

In [None]:
# Scatter plot of the predicted output on th training set by the real output
# R^2 on the legend
plt.scatter(training_set_norm['y'], pyrenn.NNOut(training_set_norm.drop(['y'], axis=1).values.T, pyrenn_model1))
plt.title('Prediction')
plt.xlabel('Real output')
plt.ylabel('Predicted output')
r2 = r2_score(training_set_norm['y'], pyrenn.NNOut(training_set_norm.drop(['y'], axis=1).values.T, pyrenn_model1))
plt.legend(['R^2 = ' + str(r2)])
plt.show()

In [None]:
# Scatter plot of the predicted by the real output of the testing set
plt.scatter(testing_set_norm['y'], y_pred1)
plt.title('Prediction')
plt.xlabel('Real')
plt.ylabel('Predicted')
r2 = r2_score(testing_set_norm['y'], y_pred1)
plt.legend(['R^2 = ' + str(r2)])
plt.show()

In [None]:
import pyrenn

# # Convert the model to pyrenn
# pyrenn_model2 = pyrenn.CreateNN([9, 128, 16, 1])

# P = training_set_norm.drop(['y'], axis=1).values
# Y = training_set_norm['y'].values

# P = P.T
# Y = Y.reshape(1, -1)

# train2 = pyrenn.train_LM(P, Y, pyrenn_model2, E_stop=1e-5, k_max=500, verbose=True)

# # save the model
# pyrenn.saveNN(pyrenn_model2, 'pyrenn_model2')

# save the train2 object
import pickle
pickle.dump(train2, open('train2.pkl', 'wb'))

# Predict the output of the testing set
y_pred = pyrenn.NNOut(testing_set_norm.drop(['y'], axis=1).values.T, pyrenn_model2)

# Plot the predicted output of the testing set
plt.figure(figsize=(20, 10))
plt.plot(testing_set_norm['y'], label='Testing set')
plt.plot(y_pred, label='Prediction')
plt.title('Prediction')
plt.xlabel('Sample')
plt.ylabel('y')
plt.legend()
plt.show()

# Evaluate the model
from sklearn.metrics import mean_squared_error, r2_score

print('Mean squared error: %.2f' % mean_squared_error(testing_set_norm['y'], y_pred))
print('Coefficient of determination: %.2f' % r2_score(testing_set_norm['y'], y_pred))



In [None]:
# Load the train2 object and plot the error history
import pickle
train2 = pickle.load(open('train2.pkl', 'rb'))

# Plot the error history
plt.figure(figsize=(20, 10))
plt.plot(train2['ErrorHistory'])
plt.title('Error history')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.show()