# Monk's Problem

Import our own neural network code aswell as numpy and pandas for data handling and loading.

In [1]:
import numpy as np
import pandas as pd
from network import *

Read the data into pandas dataframe, remove empty first column, remove ids, split into variables and class labels.

In [2]:
X1_df = pd.read_csv("data/monks/monks-1.train", sep=" ", header=None)
XT1_df = pd.read_csv("data/monks/monks-1.test", sep=" ", header=None)
X2_df = pd.read_csv("data/monks/monks-2.train", sep=" ", header=None)
XT2_df = pd.read_csv("data/monks/monks-2.test", sep=" ", header=None)
X3_df = pd.read_csv("data/monks/monks-3.train", sep=" ", header=None)
XT3_df = pd.read_csv("data/monks/monks-3.test", sep=" ", header=None)

Y1_df = X1_df.pop(1)
X1_df = X1_df.drop(columns=[0, 8])
YT1_df = XT1_df.pop(1)
XT1_df = XT1_df.drop(columns=[0, 8])
Y2_df = X2_df.pop(1)
X2_df = X2_df.drop(columns=[0, 8])
YT2_df = XT2_df.pop(1)
XT2_df = XT2_df.drop(columns=[0, 8])
Y3_df = X3_df.pop(1)
X3_df = X3_df.drop(columns=[0, 8])
YT3_df = XT3_df.pop(1)
XT3_df = XT3_df.drop(columns=[0, 8])

print(X1_df.shape)
print(X1_df.head())
print(Y1_df.shape)
print(Y1_df.head())

(124, 6)
   2  3  4  5  6  7
0  1  1  1  1  3  1
1  1  1  1  1  3  2
2  1  1  1  3  2  1
3  1  1  1  3  3  2
4  1  1  2  1  2  1
(124,)
0    1
1    1
2    1
3    1
4    1
Name: 1, dtype: int64


Apply one-hot encoding to each variable and convert to numpy arrays.

In [3]:
def one_hot_encode_feature(df, feature_to_encode):
    dummies = pd.get_dummies(df[feature_to_encode], dtype="int32", prefix=feature_to_encode)
    result_df = pd.concat([df, dummies], axis=1)
    return result_df.drop(columns=feature_to_encode)

for feature in X1_df.columns:
    X1_df = one_hot_encode_feature(X1_df, feature)
    XT1_df = one_hot_encode_feature(XT1_df, feature)
    X2_df = one_hot_encode_feature(X2_df, feature)
    XT2_df = one_hot_encode_feature(XT2_df, feature)
    X3_df = one_hot_encode_feature(X3_df, feature)
    XT3_df = one_hot_encode_feature(XT3_df, feature)

X1 = X1_df.to_numpy()
Y1 = Y1_df.to_numpy().reshape(-1, 1)
XT1 = XT1_df.to_numpy()
YT1 = YT1_df.to_numpy().reshape(-1, 1)
X2 = X2_df.to_numpy()
Y2 = Y2_df.to_numpy().reshape(-1, 1)
XT2 = XT2_df.to_numpy()
YT2 = YT2_df.to_numpy().reshape(-1, 1)
X3 = X3_df.to_numpy()
Y3 = Y3_df.to_numpy().reshape(-1, 1)
XT3 = XT3_df.to_numpy()
YT3 = YT3_df.to_numpy().reshape(-1, 1)
print(X1.shape)
print(Y1.shape)
print(X1)

(124, 17)
(124, 1)
[[1 0 0 ... 0 1 0]
 [1 0 0 ... 0 0 1]
 [1 0 0 ... 0 1 0]
 ...
 [0 0 1 ... 0 0 1]
 [0 0 1 ... 0 0 1]
 [0 0 1 ... 1 0 1]]


Split data into train and validation sets, using 80% of the data for train and 20% for validation. The test set is already given as part of the dataset.

In [4]:
def split_data(X, Y, train_fraction=0.8):
    n = X.shape[0]
    indices = np.arange(n)

    train_size = int(n * train_fraction)
    train_indices = indices[:train_size]
    test_indices = indices[train_size:]
    X_train = X[train_indices]
    Y_train = Y[train_indices]
    X_test = X[test_indices]
    Y_test = Y[test_indices]
    return X_train, Y_train, X_test, Y_test

XTr1, YTr1, XVl1, YVl1 = split_data(X1, Y1)
XTr2, YTr2, XVl2, YVl2 = split_data(X2, Y2)
XTr3, YTr3, XVl3, YVl3 = split_data(X3, Y3)

print(XTr1.shape, YTr1.shape, XVl1.shape, YVl1.shape)

(99, 17) (99, 1) (25, 17) (25, 1)


Define datasets and dataloaders.

In [5]:
XTr1_dl = DataLoader(Dataset(XTr1, YTr1), batch_size=8, shuffle=True)
XTr2_dl = DataLoader(Dataset(XTr2, YTr2), batch_size=8, shuffle=True)
XTr3_dl = DataLoader(Dataset(XTr3, YTr3), batch_size=8, shuffle=True)
XVl1_dl = DataLoader(Dataset(XVl1, YVl1), batch_size=8, shuffle=False)
XVl2_dl = DataLoader(Dataset(XVl2, YVl2), batch_size=8, shuffle=False)
XVl3_dl = DataLoader(Dataset(XVl3, YVl3), batch_size=8, shuffle=False)
XT1_dl = DataLoader(Dataset(XT1, YT1), batch_size=8, shuffle=False)
XT2_dl = DataLoader(Dataset(XT2, YT2), batch_size=8, shuffle=False)
XT3_dl = DataLoader(Dataset(XT3, YT3), batch_size=8, shuffle=False)

Train a small model to see if training works at all.

In [21]:
model = Model(
    LinearLayer(17, 3),
    ReLU(),
    LinearLayer(3, 1),
    Sigmoid(),
)
loss_fn = BCELoss()
optimizer = AdamWOptimizer(model, learning_rate=0.01, weight_decay=0.001)


for epoch in range(25):
    # Train
    train_results = []
    train_losses = []
    for x_batch, y_batch in XTr1_dl:
        y_pred = model.forward(x_batch)
        loss = loss_fn.forward(y_pred, y_batch)
        grad_loss = loss_fn.backward()
        model.backward(grad_loss)
        optimizer.step()
        train_losses.append(loss)
        y_hat = (y_pred >= 0.5).astype(int)
        result = np.mean(y_hat == y_batch)
        train_results.append(result)

    # Validate
    val_losses = []
    val_results = []
    for x_batch, y_batch in XVl1_dl:
        y_pred = model.forward(x_batch)
        loss = loss_fn.forward(y_pred, y_batch)
        val_losses.append(loss)
        y_hat = (y_pred >= 0.5).astype(int)
        result = np.mean(y_hat == y_batch)
        val_results.append(result)

    print(f"Epoch {epoch+1:03d} | Train Loss: {np.mean(train_losses):.4f} | Train Acc: {np.mean(train_results):.4f} | Val Loss: {np.mean(val_losses):.4f} | Val Acc: {np.mean(val_results):.4f}")

# Test
test_losses = []
test_results = []
for x_batch, y_batch in XT1_dl:
    y_pred = model.forward(x_batch)
    loss = loss_fn.forward(y_pred, y_batch)
    test_losses.append(loss)
    y_hat = (y_pred >= 0.5).astype(int)
    result = np.mean(y_hat == y_batch)
    test_results.append(result)

print(f"Test Loss: {np.mean(test_losses):.4f} | Test Acc: {np.mean(test_results):.4f}")


Epoch 001 | Train Loss: 0.7253 | Train Acc: 0.5705 | Val Loss: 0.7037 | Val Acc: 0.2500
Epoch 002 | Train Loss: 0.6671 | Train Acc: 0.6571 | Val Loss: 0.7343 | Val Acc: 0.2500
Epoch 003 | Train Loss: 0.6605 | Train Acc: 0.6058 | Val Loss: 0.7616 | Val Acc: 0.2188
Epoch 004 | Train Loss: 0.6301 | Train Acc: 0.6731 | Val Loss: 0.7689 | Val Acc: 0.2188
Epoch 005 | Train Loss: 0.6187 | Train Acc: 0.6731 | Val Loss: 0.7725 | Val Acc: 0.2500
Epoch 006 | Train Loss: 0.6064 | Train Acc: 0.6731 | Val Loss: 0.7788 | Val Acc: 0.2812
Epoch 007 | Train Loss: 0.5852 | Train Acc: 0.7019 | Val Loss: 0.8020 | Val Acc: 0.2812
Epoch 008 | Train Loss: 0.5492 | Train Acc: 0.7244 | Val Loss: 0.8588 | Val Acc: 0.3438
Epoch 009 | Train Loss: 0.4936 | Train Acc: 0.7885 | Val Loss: 0.9464 | Val Acc: 0.2812
Epoch 010 | Train Loss: 0.5057 | Train Acc: 0.7756 | Val Loss: 1.1178 | Val Acc: 0.3125
Epoch 011 | Train Loss: 0.4319 | Train Acc: 0.8077 | Val Loss: 1.2845 | Val Acc: 0.3438
Epoch 012 | Train Loss: 0.3968 |

Test accuracy is okay, validation loss is increasing but it could be due to the very small validation set.