In [None]:
%load_ext autoreload
%autoreload 2

# Imports

In [2]:
import numpy as np
import pandas as pd
import PreClf, MultiPreClf
from sklearn.datasets import fetch_openml
from sklearn.datasets import make_moons

import matplotlib.pyplot as plt 
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# MLP from sklearn
from sklearn.neural_network import MLPClassifier

# MNIST 
X_, y_ = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False, parser="pandas")
# Preprocessing normalisation 
X_ = X_ / 255

y_ = y_.astype(int)

# On-hot encoding
encoder = OneHotEncoder(sparse=False)
y_one_hot = encoder.fit_transform(y_.reshape(-1, 1))


X_train, X_test, y_train, y_test = train_test_split(X_, y_one_hot, test_size=0.2, random_state=42)
y_train_index = np.argmax(y_train, axis=1)


# X_train, X_test, y_train, y_test = train_test_split(X_, y_one_hot, test_size=0.2, random_state=42)





# MLP

In [149]:
model_ = MultiPreClf.MLP(alpha=0.00001, n_iterations=30, hidden_layers_size=[128])

In [150]:
model_.train(X_train, y_train, batch_size=10, verbose=True)

Epoch: 1, Training loss: 2.1392833205091835
Epoch: 1, Training accuracy: 0.2
Epoch: 2, Training loss: 2.2054113476795107
Epoch: 2, Training accuracy: 0.2
Epoch: 3, Training loss: 2.0605320935877267
Epoch: 3, Training accuracy: 0.4
Epoch: 4, Training loss: 2.108185335417319
Epoch: 4, Training accuracy: 0.5
Epoch: 5, Training loss: 1.886107941848478
Epoch: 5, Training accuracy: 0.5
Epoch: 6, Training loss: 2.138928403890501
Epoch: 6, Training accuracy: 0.3
Epoch: 7, Training loss: 1.8728679653570783
Epoch: 7, Training accuracy: 0.4
Epoch: 8, Training loss: 1.8694318537899697
Epoch: 8, Training accuracy: 0.5
Epoch: 9, Training loss: 1.8213820135791774
Epoch: 9, Training accuracy: 0.5
Epoch: 11, Training loss: 1.9026339335837767
Epoch: 11, Training accuracy: 0.6
Epoch: 12, Training loss: 1.869810169383681
Epoch: 12, Training accuracy: 0.6
Epoch: 13, Training loss: 1.860156159502548
Epoch: 13, Training accuracy: 0.8
Epoch: 14, Training loss: 1.5424914526482518
Epoch: 14, Training accuracy: 

([2.033082685425633,
  2.3034813160157435,
  2.3039553722571506,
  2.3624594431206214,
  2.295555816431912,
  2.3527618024819335,
  2.2927134640375897,
  2.3423789069127148,
  2.2723775207054766,
  2.312004725879236,
  2.3321624961662657,
  2.293375490879847,
  2.256672688042801,
  2.3222092958629545,
  2.256692268744358,
  2.236410141151096,
  2.3430037585787082,
  2.312657167163267,
  2.388456352710697,
  2.408766976164523,
  2.42478702608811,
  2.345099055033735,
  2.3849463588595166,
  2.2120943411033798,
  2.287739486741674,
  2.3291840510512802,
  2.207698465694936,
  2.359362764176468,
  2.347065822291213,
  2.330104071675902,
  2.3048101547251707,
  2.219821027550811,
  2.2372323162749135,
  2.1659425579964617,
  2.27379399310894,
  2.368700365517884,
  2.2577035485615644,
  2.4295710354652176,
  2.4422285028851394,
  2.256456396469941,
  2.41672882660906,
  2.3383672753197238,
  2.383337496794275,
  2.383217819550401,
  2.2953151736875537,
  2.271304357524217,
  2.228388826947

# MLP (torch)

In [5]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

In [609]:
# Turn data into torch tensors
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_index, dtype=torch.long)

In [611]:
train_dataset = torch.utils.data.TensorDataset(x_train_tensor, y_train_tensor)

In [612]:
class MLP_DR(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size):
        super(MLP_DR, self).__init__()
        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.output_size = output_size

        # layer 1
        self.fc1 = nn.Linear(input_size, hidden_layers[0])

        # layer 2
        self.fc2 = nn.Linear(hidden_layers[0], hidden_layers[1])

        # layer 3
        self.fc3 = nn.Linear(hidden_layers[1], output_size)

    def forward(self, x):

        linear_1 = self.fc1(x)
        act_1 = F.relu(linear_1)

        linear_2 = self.fc2(act_1)
        act_2 = F.relu(linear_2)

        linear_3 = self.fc3(act_2)
        act_3 = F.softmax(linear_3, dim=1)

        return act_3

In [616]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MLP_DR(784, [20, 10], 10).to(device)

In [614]:
criterion = nn.CrossEntropyLoss()

In [615]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [617]:
num_epochs = 5  # Number of epochs can be adjusted

for epoch in range(num_epochs):
    model.train()
    for data, targets in train_loader:
        data, targets = data.to(device), targets.to(device)  # Move data to GPU

        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/5], Loss: 2.3043
Epoch [2/5], Loss: 2.3011
Epoch [3/5], Loss: 2.3036
Epoch [4/5], Loss: 2.3028
Epoch [5/5], Loss: 2.3038
