In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import pandas as pd

BATCH_SIZE = 32
EPOCHS = 1

class Network(nn.Module):
    def __init__(self, input_size, hidden_size_1,hidden_size_2, output_size):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.fc3 = nn.Linear(hidden_size_2, output_size)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        #x = self.dropout(x)
        x = F.relu(self.fc2(x))
        #x = self.dropout(x)
        x = F.relu(self.fc3(x))
        return x
    
    def fit(self, X, y):
        optimizer = optim.SGD(self.parameters(), lr=0.01)
        #criterion = nn.MSELoss()
        criterion = nn.CrossEntropyLoss()
        
        optimizer.zero_grad()
        output = self(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
    
    def predict(self, X):
        return self(X)

In [2]:
digits = fetch_openml('mnist_784')

target = pd.get_dummies(digits.target).astype(int)
target = target.to_numpy()
target = torch.tensor(target, dtype=torch.float32)

data = digits.data.to_numpy()
data = data / 255
data = torch.tensor(data, dtype=torch.float32)

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [5]:
network = Network(784, 64, 32, 10)
network.train()
for epoch in range(EPOCHS):
    for i in range(0, len(X_train), BATCH_SIZE):
        batch_X = X_train[i:i+BATCH_SIZE]
        batch_y = y_train[i:i+BATCH_SIZE]
        network.fit(batch_X, batch_y)

In [6]:
network.eval()
y_pred = network.predict(X_test)
pred_argmax = torch.argmax(y_pred, dim=1)
true_argmax = torch.argmax(y_test, dim=1)
pred_argmax == true_argmax
result = (pred_argmax == true_argmax).int()
result
torch.mean(result.float())

tensor(0.7034)

In [11]:
y_pred[:10]

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 8.1553,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0981,
         0.0000],
        [0.0000, 0.0000, 0.1028, 2.1515, 0.0000, 0.0000, 0.0000, 5.5115, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 6.7445, 0.0000,
         0.0000],
        [8.3331, 0.0000, 4.3794, 0.0000, 0.0000, 0.0000, 0.4915, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 5.2436, 0.0000, 0.0798,
         0.0000],
        [0.0000, 0.0000, 5.2332, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 2.3691,
         0.0000],
        [0.0000, 1.1204, 0.1120, 1.8463, 0.0000, 0.0000, 0.0000, 5.2361, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.5172, 0.0000,
         0.0000]], grad_fn=<