In [24]:
import tensorflow as tf
import pandas as pd
import numpy as np
np.set_printoptions(precision=3, suppress=True)

In [79]:
def drop_columns(data: pd.DataFrame, columns: list):
    """
    Drop redundant columns (noise) from the dataframe to make it easier for the model to learn
    :param data: the dataframe to drop columns from
    :param columns: a list that contains the columns to drop
    :return: None. It is a side-effect and merely modifies the dataframe without returning it.
    """
    for col in columns:
        data.drop(col, axis=1, inplace=True)

In [80]:
# loading the datasets
data_1 = pd.read_csv('data_1.csv')
data_2 = pd.read_csv('data_2.csv')

data_1.head()
drop_columns(data_1, ['ReadIn_ID', 'USER_ID'])
print(len(data_1))
data_1.head()


1165


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.755,0.515,0.286,1.739,0
1,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,5.0,0.384,0.522,1.5,0
2,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.459,0.636,0.375,0.747,0
3,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.392,0.46,0.353,1.727,0
4,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.7,0.546,0.316,1.137,0


In [81]:
data_2.head()
drop_columns(data_2, ['ReadIn_ID', 'USER_ID'])
print(len(data_2))
data_2.head()

715


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,0,0,0,1,1,0,0,0,0,0,...,0,1,0,0,0,4.954,-0.74,0.0,1.489,1
1,2,0,0,1,3,0,0,0,0,0,...,0,1,0,0,0,5.209,0.343,0.444,1.65,1
2,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,0,5.392,-0.346,0.364,1.913,1
3,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,0,5.044,0.309,0.5,1.416,1
4,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,0,5.087,0.327,0.48,0.912,1


In [82]:
# combining data from both dataframes data_1 and data_2
data = pd.concat([data_1, data_2], ignore_index=True)
print(len(data))
data.head()

1880


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.755,0.515,0.286,1.739,0
1,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,5.0,0.384,0.522,1.5,0
2,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,4.459,0.636,0.375,0.747,0
3,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.392,0.46,0.353,1.727,0
4,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.7,0.546,0.316,1.137,0


In [83]:
# shuffling the data
data = data.sample(frac=1, random_state=1).reset_index(drop=True)
print(len(data))
data.head()

1880


Unnamed: 0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D769,D770,D771,D772,D773,D774,D775,D776,D777,is_activator
0,1,0,0,2,2,0,0,0,0,0,...,0,1,0,0,0,5.285,-0.306,0.2,2.292,1
1,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,4.644,-0.763,0.316,2.232,1
2,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,4.807,-0.296,0.571,2.132,0
3,2,0,0,1,4,0,0,0,0,0,...,0,1,0,0,0,5.392,-0.355,0.375,3.423,0
4,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,0,5.322,0.357,0.4,2.288,0


In [84]:
# seperating the data into X and y
x = data.drop('is_activator', axis=1)
y = data['is_activator']

In [85]:
# splitting the data into training and testing data
x_train = np.asarray(x[:int(len(x)*0.8)])
x_test = np.asarray(x[int(len(x)*0.8):])
print(f'shape is {x_train.shape}')

x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_test, axis=1)

y_train = np.asarray(y[:int(len(y)*0.8)])
y_test = np.asarray(y[int(len(y)*0.8):])
print(f'shape is {y_train.shape}')



shape is (1504, 777)
shape is (1504,)


In [86]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(2, activation=tf.nn.softmax))

# do another compile using binary categorical crossentropy
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=450)

Epoch 1/450
Epoch 2/450
Epoch 3/450
Epoch 4/450
Epoch 5/450
Epoch 6/450
Epoch 7/450
Epoch 8/450
Epoch 9/450
Epoch 10/450
Epoch 11/450
Epoch 12/450
Epoch 13/450
Epoch 14/450
Epoch 15/450
Epoch 16/450
Epoch 17/450
Epoch 18/450
Epoch 19/450
Epoch 20/450
Epoch 21/450
Epoch 22/450
Epoch 23/450
Epoch 24/450
Epoch 25/450
Epoch 26/450
Epoch 27/450
Epoch 28/450
Epoch 29/450
Epoch 30/450
Epoch 31/450
Epoch 32/450
Epoch 33/450
Epoch 34/450
Epoch 35/450
Epoch 36/450
Epoch 37/450
Epoch 38/450
Epoch 39/450
Epoch 40/450
Epoch 41/450
Epoch 42/450
Epoch 43/450
Epoch 44/450
Epoch 45/450
Epoch 46/450
Epoch 47/450
Epoch 48/450
Epoch 49/450
Epoch 50/450
Epoch 51/450
Epoch 52/450
Epoch 53/450
Epoch 54/450
Epoch 55/450
Epoch 56/450
Epoch 57/450
Epoch 58/450
Epoch 59/450
Epoch 60/450
Epoch 61/450
Epoch 62/450
Epoch 63/450
Epoch 64/450
Epoch 65/450
Epoch 66/450
Epoch 67/450
Epoch 68/450
Epoch 69/450
Epoch 70/450
Epoch 71/450
Epoch 72/450
Epoch 73/450
Epoch 74/450
Epoch 75/450
Epoch 76/450
Epoch 77/450
Epoch 78

<keras.callbacks.History at 0x1d5cf9164d0>

In [87]:
val_loss, val_acc = model.evaluate(x_test, y_test)
print(f'Loss: {val_loss}, Accuracy: {val_acc}')

Loss: 0.390193372964859, Accuracy: 0.8909574747085571
