In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/p1_train.csv')

In [3]:
X, Y = df.drop(['class'], axis=1), df['class']

In [4]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [5]:
scaler = StandardScaler().fit(X)
X = scaler.transform(X)
encoder = LabelEncoder()
Y = encoder.fit_transform(Y)

In [6]:
import keras

In [7]:
model = keras.Sequential()
model.add(keras.layers.Input(shape=(X.shape[-1],)))
model.add(keras.layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l1(0.01)))
model.add(keras.layers.Dense(units=1, activation='sigmoid'))
model.summary()


In [8]:
sgd_optimizer = keras.api.optimizers.SGD(learning_rate=0.01)

In [9]:
model.compile(optimizer=sgd_optimizer, loss='binary_crossentropy', metrics=['accuracy', 'precision', 'recall'])

In [10]:
early_stop = keras.callbacks.EarlyStopping(monitor='loss', mode='min', min_delta=0.001, patience=10, restore_best_weights=True)

In [11]:
model.fit(X, Y, epochs=2000, batch_size=64, shuffle=True, callbacks=[early_stop])

Epoch 1/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5184 - loss: 2.2383 - precision: 0.6832 - recall: 0.4738  
Epoch 2/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5742 - loss: 2.1750 - precision: 0.7194 - recall: 0.6020 
Epoch 3/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6365 - loss: 2.1027 - precision: 0.7035 - recall: 0.7288 
Epoch 4/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6481 - loss: 2.0670 - precision: 0.6958 - recall: 0.7857 
Epoch 5/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6777 - loss: 2.0271 - precision: 0.7170 - recall: 0.8309 
Epoch 6/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7154 - loss: 1.9781 - precision: 0.7470 - recall: 0.8707 
Epoch 7/2000
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1444e48c0>

In [12]:
df_test = pd.read_csv('data/p1_test_student.csv')
X_test, Y_test = df_test.drop(['class'], axis=1), df_test['class']
X_test = scaler.transform(X_test)
Y_predict = model.predict(X_test)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 


In [13]:
Y_test = encoder.transform(Y_test).reshape(-1, 1)
Y_test.shape

(305, 1)

In [14]:
Y_predict_labels = (Y_predict > 0.5).astype(int)

In [15]:
(Y_predict_labels == 1).sum(), (Y_predict_labels == 0).sum()

(np.int64(204), np.int64(101))

In [16]:
(Y_test == 1).sum(), (Y_test == 0).sum()

(np.int64(196), np.int64(109))

In [17]:
TP = ((Y_predict_labels == 1) & (Y_test == 1)).sum()
TF = ((Y_predict_labels == 0) & (Y_test == 0)).sum()
FP = ((Y_predict_labels == 1) & (Y_test != 1)).sum()
FN = ((Y_predict_labels == 0) & (Y_test == 1)).sum()
accuracy = (Y_predict_labels == Y_test).sum() / Y_test.shape[0]
precision = TP / (TP + FP)
recall =  TP / (TP + FN)
f1_score = 2 * precision * recall / (precision + recall)
print(f"accuracy: {accuracy}, precision: {precision}, recall: {recall}, f1_score: {f1_score}")

accuracy: 0.8819672131147541, precision: 0.8921568627450981, recall: 0.9285714285714286, f1_score: 0.9099999999999999
