In [13]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from CustomPreProcessor import CustomPreProcessor

In [14]:
"""
Importing the Custom Pre Processor module, so that it loads the csv and elaborates the new dataset, the scaled inputs
and the targets
"""
PP = CustomPreProcessor('dataset.csv')
dataset_preprocessed, scaled_inputs, targets = PP.pre_process(3, StandardScaler())

In [15]:
"""
We split the data in 3 parts:
- 80% : Training data
- 10% : Validation Data
- 10% : Testing Data
"""
train_data, validation_data, test_data = np.split(dataset_preprocessed, [int(.8 * len(dataset_preprocessed)), int(.9 * len(dataset_preprocessed))])

train_data_inputs, train_data_targets = train_data.iloc[:,:-1], train_data.iloc[:,[-1]]
validation_data_inputs, validation_data_targets = validation_data.iloc[:,:-1], validation_data.iloc[:,[-1]]
test_data_inputs, test_data_targets = test_data.iloc[:,:-1], test_data.iloc[:,[-1]]

In [16]:
"""
We convert the pandas object into numpy objects, in order to feed them into the model
"""
np_train_data = train_data.to_numpy()
np_val_inputs = validation_data_inputs.to_numpy()
np_val_targets = validation_data_targets.to_numpy()

In [17]:
# determine the maximum number of epochs
NUM_EPOCHS = 6

In [18]:
"""
After having specified the size of the inputs, the outputs and the hidden layers, we build the model sequentially
"""
input_size = 59
output_size = 1

hidden_layer_size = 75

model = Sequential([
            Dense(hidden_layer_size, input_shape=(input_size,)),
            
            Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
            Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 3rd hidden layer
            Dense(hidden_layer_size, activation='relu'), # 4th hidden layer
            Dense(hidden_layer_size, activation='relu'), # 5th hidden layer
    
            Dense(output_size, activation='sigmoid') # output layer
])


In [19]:
"""
We compile the model using the well-known 'adam' optimizer and using the binary crossentropy loss function
since the output has only 2 classes (0 and 1)
"""
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
"""
We fit the model using the data extracted before and 
"""
model.fit(x=train_data_inputs, y=train_data_targets, epochs=NUM_EPOCHS, validation_data=(np_val_inputs, np_val_targets), 
          validation_steps=len(np_val_inputs), verbose=2)

Train on 14808 samples, validate on 1851 samples
Epoch 1/6
14808/14808 - 7s - loss: 0.6875 - accuracy: 0.5387 - val_loss: 0.6873 - val_accuracy: 0.5429
Epoch 2/6
14808/14808 - 7s - loss: 0.6830 - accuracy: 0.5510 - val_loss: 0.6908 - val_accuracy: 0.5370
Epoch 3/6
14808/14808 - 9s - loss: 0.6820 - accuracy: 0.5577 - val_loss: 0.6889 - val_accuracy: 0.5402
Epoch 4/6
14808/14808 - 8s - loss: 0.6813 - accuracy: 0.5590 - val_loss: 0.6912 - val_accuracy: 0.5500
Epoch 5/6
14808/14808 - 9s - loss: 0.6798 - accuracy: 0.5652 - val_loss: 0.6881 - val_accuracy: 0.5359
Epoch 6/6
14808/14808 - 9s - loss: 0.6784 - accuracy: 0.5676 - val_loss: 0.6899 - val_accuracy: 0.5397


<tensorflow.python.keras.callbacks.History at 0x264ae690508>

In [22]:
"""
Testing the model on the testing data
"""
loss_value, accuracy = model.evaluate(x=test_data_inputs, y=test_data_targets, verbose=2)
accuracy

1851/1851 - 0s - loss: 0.6838 - accuracy: 0.5586


0.55861694