In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
np.set_printoptions(precision=3, suppress=True)

In [2]:
def drop_columns(data: pd.DataFrame, columns: list):
    """
    Drop redundant columns (noise) from the dataframe to make it easier for the model to learn
    :param data: the dataframe to drop columns from
    :param columns: a list that contains the columns to drop
    :return: None. It is a side-effect and merely modifies the dataframe without returning it.
    """
    for col in columns:
        data.drop(col, axis=1, inplace=True)

In [3]:
# loading the datasets
data_1 = pd.read_csv('data_1.csv')
data_2 = pd.read_csv('data_2.csv')

data_1.head()

drop_columns(data_1, ['ReadIn_ID', 'USER_ID'])

print(len(data_1))
data_1.head()


1165


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.755,0.515,0.286,1.739,0
1,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,5.0,0.384,0.522,1.5,0
2,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.459,0.636,0.375,0.747,0
3,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.392,0.46,0.353,1.727,0
4,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.7,0.546,0.316,1.137,0


In [4]:
data_2.head()

drop_columns(data_2, ['ReadIn_ID', 'USER_ID'])

print(len(data_2))
data_2.head()

715


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,0,0,0,1,1,0,0,0,0,0,...,0,1,0,0,0,4.954,-0.74,0.0,1.489,1
1,2,0,0,1,3,0,0,0,0,0,...,0,1,0,0,0,5.209,0.343,0.444,1.65,1
2,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,0,5.392,-0.346,0.364,1.913,1
3,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,0,5.044,0.309,0.5,1.416,1
4,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,0,5.087,0.327,0.48,0.912,1


In [6]:
# combining data from both dataframes data_1 and data_2
data = pd.concat([data_1, data_2], ignore_index=True)
print(len(data))
data.head()

1880


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.755,0.515,0.286,1.739,0
1,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,5.0,0.384,0.522,1.5,0
2,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.459,0.636,0.375,0.747,0
3,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.392,0.46,0.353,1.727,0
4,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.7,0.546,0.316,1.137,0


In [7]:
# shuffling the data
data = data.sample(frac=1, random_state=1).reset_index(drop=True)
print(len(data))
data.head()

1880


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,1,0,0,2,2,0,0,0,0,0,...,0,1,0,0,0,5.285,-0.306,0.2,2.292,1
1,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.644,-0.763,0.316,2.232,1
2,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,4.807,-0.296,0.571,2.132,0
3,2,0,0,1,4,0,0,0,0,0,...,0,1,0,0,0,5.392,-0.355,0.375,3.423,0
4,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,5.322,0.357,0.4,2.288,0


In [8]:
# seperating the data into X and y
x = data.drop('is_activator', axis=1)
y = data['is_activator']

In [15]:
# defining hyperparameters
train_test_ratio = 0.8 #added
num_hidden_layers = 2 # added
num_nodes_per_layer = 48 #added
activation_function_hidden = tf.nn.relu #added
activation_function_output = tf.nn.sigmoid #added
optimizing_algorithm = 'adam' #added
loss_function = 'sparse_categorical_crossentropy' #added
epochs = 500 #added

In [16]:
# splitting the data into training and testing data
x_train = np.asarray(x[:int(len(x)*train_test_ratio)])
x_test = np.asarray(x[int(len(x)*train_test_ratio):])
print(f'shape is {x_train.shape}')

x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_test, axis=1)

y_train = np.asarray(y[:int(len(y)*train_test_ratio)])
y_test = np.asarray(y[int(len(y)*train_test_ratio):])
print(f'shape is {y_train.shape}')



shape is (1504, 777)
shape is (1504,)


In [17]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
for _ in range(num_hidden_layers):
    model.add(tf.keras.layers.Dense(num_nodes_per_layer, activation=activation_function_hidden))
model.add(tf.keras.layers.Dense(2, activation=activation_function_output))

# do another compile using binary categorical crossentropy
model.compile(optimizer=optimizing_algorithm,
              loss=loss_function,
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=epochs)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x2436dfa1630>

In [18]:
val_loss, val_acc = model.evaluate(x_test, y_test)
print(f'Loss: {val_loss}, Accuracy: {val_acc}')

Loss: 0.34716546535491943, Accuracy: 0.8723404407501221


In [19]:
# writing the hyperparameters and the performance of the model to a file
try:
        with open('performance.csv', 'x') as f:
                f.write('train_test_ratio,\
                        num_hidden_layers,\
                        num_nodes_per_layer,\
                        activation_function_hidden,\
                        activation_function_output,\
                        optimizing_algorithm,\
                        loss_function,\
                        epochs,\
                        val_loss,\
                        val_acc\n')
                f.write(f'{train_test_ratio},\
                        {num_hidden_layers},\
                        {num_nodes_per_layer},\
                        {str(activation_function_hidden)},\
                        {str(activation_function_output)},\
                        {optimizing_algorithm},\
                        {loss_function},\
                        {epochs},\
                        {val_loss},\
                        {val_acc}\n')
except FileExistsError:
        with open('performance.csv', 'a') as f:
                f.write(f'{train_test_ratio},\
                        {num_hidden_layers},\
                        {num_nodes_per_layer},\
                        {str(activation_function_hidden)},\
                        {str(activation_function_output)},\
                        {optimizing_algorithm},\
                        {loss_function},\
                        {epochs},\
                        {val_loss},\
                        {val_acc}\n')