### Import Packages

In [83]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras import datasets as tf_datasets
from sklearn.metrics import confusion_matrix
from IPython.display import clear_output

### Import Data

In [56]:
def one_hot(y):
    y_onehot = np.zeros([len(y), len(np.unique(y))])

    for i in range(len(y)):
        y_onehot[i, y[i]] = 1
        
    return y_onehot

(x_train, y_train), (x_test, y_test) = tf_datasets.mnist.load_data()

training_rows = 10000
testing_rows = 1000

X_train = x_train.reshape(x_train.shape[0], x_train.shape[1] * x_train.shape[2])[0:training_rows, :]
X_test = x_test.reshape(x_test.shape[0], x_train.shape[1] * x_train.shape[2])[0:testing_rows, :]

y_train = y_train[0:training_rows]
y_test = y_test[0:testing_rows]

X_train = X_train / 255.0
X_test = X_test / 255.0

y_train_oh = one_hot(y_train)[0:training_rows, :]
y_test_oh = one_hot(y_test)[0:testing_rows, :]

train_data = []
for i in range(len(y_train)):
    train_data.append((X_train[i, :], y_train[i]))

trainloader = torch.utils.data.DataLoader(train_data, batch_size=4, shuffle=True)

### Define Neural net

In [81]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 784)
        self.fc2 = nn.Linear(784, 80)
        self.fc3 = nn.Linear(80, 20)
        self.fc4 = nn.Linear(20, 10)
        
    def Forward(self, X):
        X = torch.tanh(self.fc1(X))
        X = torch.tanh(self.fc2(X))
        X = torch.tanh(self.fc3(X))
        X = self.fc4(X)
        return X
    
net = Net()
net = net.float()
print(net)

Net(
  (fc1): Linear(in_features=784, out_features=784, bias=True)
  (fc2): Linear(in_features=784, out_features=80, bias=True)
  (fc3): Linear(in_features=80, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=10, bias=True)
)


### Define Loss Function and Optimizer

In [82]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = .0005)

### Train the Model

In [86]:
losses = []

for epoch in range(300):
    running_loss = 0
    
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        
        optimizer.zero_grad()
        
        outputs = net.Forward(inputs.float())
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            losses.append(running_loss)
            running_loss = 0
        
    if epoch % 10 == 0:
        clear_output()

[292,  2000] loss: 0.004
[293,  2000] loss: 0.004
[294,  2000] loss: 0.004
[295,  2000] loss: 0.004
[296,  2000] loss: 0.004
[297,  2000] loss: 0.004
[298,  2000] loss: 0.004
[299,  2000] loss: 0.004
[300,  2000] loss: 0.004


In [87]:
preds = net.Forward(torch.from_numpy(X_test).float()).max(1)[1]
print(torch.mean((preds == torch.from_numpy(y_test).long()).float()).item())

0.9340000152587891
