In [1]:
#Load MNIST Data
import numpy as np
from layers import *
import solver

import torch
import torchvision

transform = torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                   (0.5, 0.5), (0.5, 0.5))
                             ])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=60000, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                        download=True, transform=transform)
testloader = torch.utils.data.DataLoader(trainset, batch_size=10000, shuffle=True)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

dataiter = iter(trainloader)
X_train_full, y_train_full = dataiter.next()
X_train_full = X_train_full.numpy()
y_train_full = y_train_full.numpy()
X_train_full = X_train_full.reshape(60000, 28*28)

n_train = int(np.round(0.9*X_train_full.shape[0]))
d = X_train_full.shape[1]

X_train = X_train_full[:n_train]
X_val = X_train_full[n_train:]
y_train = y_train_full[:n_train]
y_val = y_train_full[n_train:]
data_mnist = {'X_train':X_train, 'X_val':X_val, 'y_train':y_train, 'y_val':y_val}                              
                              
dataiter = iter(testloader)
X_test, y_test = dataiter.next()
X_test = X_test.numpy()
y_test = y_test.numpy()
X_test = X_test.reshape(10000, 28*28)



In [2]:
#Sketch the training data
A = np.random.randn(2000, X_train_full.shape[0])
X_train_full_s = A @ X_train_full
X_train_s = X_train_full_s[:-2]
X_val_s = X_train_full_s[-1]
y_train_full_s = A @ y_train_full
y_train_s = y_train_full_s[:-2]
y_val_s = y_train_full_s[-1]
data_mnist_s = {'X_train':X_train_s, 'X_val':X_val_s, 'y_train':y_train_s, 'y_val':y_val_s}
d = X_train_full_s.shape[1]

In [3]:
# fc_relu Classifier - Single Layer
import fc_relu
fc_relu_model = fc_relu.fc_relu_Classifier(input_dim=d, hidden_dim=None,
                 weight_scale=1e-3, reg=0.0)
config = {'learning_rate':1e-2}
fc_relu_solv = solver.Solver(fc_relu_model, data_mnist, optim_config = config,
                  num_epochs=100, batch_size=64, print_every=100)
fc_relu_solv.train()

(Iteration 1 / 84300) loss: 34.562966
(Epoch 0 / 100) train acc: 0.105000; val_acc: 0.117333
(Iteration 101 / 84300) loss: 259881.220379
(Iteration 201 / 84300) loss: 1055579.485308
(Iteration 301 / 84300) loss: 2339215.731934
(Iteration 401 / 84300) loss: 4249701.282463
(Iteration 501 / 84300) loss: 6717560.132882
(Iteration 601 / 84300) loss: 9656308.503826
(Iteration 701 / 84300) loss: 13342785.214187
(Iteration 801 / 84300) loss: 16776874.567699
(Epoch 1 / 100) train acc: 0.112000; val_acc: 0.117333
(Iteration 901 / 84300) loss: 21751634.489095
(Iteration 1001 / 84300) loss: 26802301.128454
(Iteration 1101 / 84300) loss: 32374332.571673
(Iteration 1201 / 84300) loss: 37780063.041291
(Iteration 1301 / 84300) loss: 45047550.491834
(Iteration 1401 / 84300) loss: 51452017.970362
(Iteration 1501 / 84300) loss: 61412138.794138
(Iteration 1601 / 84300) loss: 68039058.021851
(Epoch 2 / 100) train acc: 0.099000; val_acc: 0.117333
(Iteration 1701 / 84300) loss: 77745260.623011
(Iteration 180

(Iteration 15101 / 84300) loss: 6101433819.797058
(Epoch 18 / 100) train acc: 0.105000; val_acc: 0.117333
(Iteration 15201 / 84300) loss: 6126162217.614614
(Iteration 15301 / 84300) loss: 6275892439.426982
(Iteration 15401 / 84300) loss: 6467352691.695419
(Iteration 15501 / 84300) loss: 6484111284.493578
(Iteration 15601 / 84300) loss: 6408674720.342392
(Iteration 15701 / 84300) loss: 6569520822.596554
(Iteration 15801 / 84300) loss: 6689876309.164126
(Iteration 15901 / 84300) loss: 6834642906.828927
(Iteration 16001 / 84300) loss: 7109219421.220366
(Epoch 19 / 100) train acc: 0.102000; val_acc: 0.117333
(Iteration 16101 / 84300) loss: 6992296582.158972
(Iteration 16201 / 84300) loss: 7229858774.966612
(Iteration 16301 / 84300) loss: 6969196543.629671
(Iteration 16401 / 84300) loss: 7223676926.187672
(Iteration 16501 / 84300) loss: 7184640355.597754
(Iteration 16601 / 84300) loss: 7451331387.319847
(Iteration 16701 / 84300) loss: 7541614663.954403
(Iteration 16801 / 84300) loss: 729712

(Iteration 29601 / 84300) loss: 23412671228.851814
(Iteration 29701 / 84300) loss: 23304692106.623596
(Iteration 29801 / 84300) loss: 24567555429.480156
(Iteration 29901 / 84300) loss: 23607823879.754074
(Iteration 30001 / 84300) loss: 24675536565.005524
(Iteration 30101 / 84300) loss: 23883787362.904060
(Iteration 30201 / 84300) loss: 24822694830.336548
(Iteration 30301 / 84300) loss: 24989815073.638714
(Epoch 36 / 100) train acc: 0.112000; val_acc: 0.117333
(Iteration 30401 / 84300) loss: 24841678639.822601
(Iteration 30501 / 84300) loss: 24892378929.280914
(Iteration 30601 / 84300) loss: 25126681158.050041
(Iteration 30701 / 84300) loss: 25988939061.125431
(Iteration 30801 / 84300) loss: 24671046574.262360
(Iteration 30901 / 84300) loss: 25366561645.165337
(Iteration 31001 / 84300) loss: 25674818091.841396
(Iteration 31101 / 84300) loss: 25474288045.430565
(Epoch 37 / 100) train acc: 0.121000; val_acc: 0.117333
(Iteration 31201 / 84300) loss: 26122674596.984444
(Iteration 31301 / 84

(Iteration 44301 / 84300) loss: 52270834129.520752
(Iteration 44401 / 84300) loss: 53269321883.007538
(Iteration 44501 / 84300) loss: 52855833177.178413
(Iteration 44601 / 84300) loss: 54261193579.001266
(Epoch 53 / 100) train acc: 0.131000; val_acc: 0.117333
(Iteration 44701 / 84300) loss: 52946396387.200180
(Iteration 44801 / 84300) loss: 53844114585.429321
(Iteration 44901 / 84300) loss: 54021688257.624199
(Iteration 45001 / 84300) loss: 54730460766.021729
(Iteration 45101 / 84300) loss: 55393257946.282639
(Iteration 45201 / 84300) loss: 54867102477.820663
(Iteration 45301 / 84300) loss: 56023571164.921982
(Iteration 45401 / 84300) loss: 55585116278.191910
(Iteration 45501 / 84300) loss: 54206425952.295937
(Epoch 54 / 100) train acc: 0.096000; val_acc: 0.117333
(Iteration 45601 / 84300) loss: 55693306286.601028
(Iteration 45701 / 84300) loss: 54813667955.305176
(Iteration 45801 / 84300) loss: 56991011771.969147
(Iteration 45901 / 84300) loss: 54925160522.630081
(Iteration 46001 / 84

(Iteration 59001 / 84300) loss: 93824677977.589249
(Epoch 70 / 100) train acc: 0.123000; val_acc: 0.117333
(Iteration 59101 / 84300) loss: 94740499281.901764
(Iteration 59201 / 84300) loss: 95285708027.851547
(Iteration 59301 / 84300) loss: 95370370923.471680
(Iteration 59401 / 84300) loss: 95427294977.771210
(Iteration 59501 / 84300) loss: 95133691380.605377
(Iteration 59601 / 84300) loss: 95790182746.431274
(Iteration 59701 / 84300) loss: 94792287437.555603
(Iteration 59801 / 84300) loss: 96308039454.901627
(Epoch 71 / 100) train acc: 0.118000; val_acc: 0.117333
(Iteration 59901 / 84300) loss: 95500988690.800476
(Iteration 60001 / 84300) loss: 98834053435.092682
(Iteration 60101 / 84300) loss: 95524640100.393616
(Iteration 60201 / 84300) loss: 95426590548.117661
(Iteration 60301 / 84300) loss: 97248863305.763336
(Iteration 60401 / 84300) loss: 95986888261.251846
(Iteration 60501 / 84300) loss: 95246173720.498230
(Iteration 60601 / 84300) loss: 95650103529.536209
(Epoch 72 / 100) trai

(Iteration 73501 / 84300) loss: 145067573133.774292
(Iteration 73601 / 84300) loss: 144220621124.377625
(Iteration 73701 / 84300) loss: 148237177559.542664
(Iteration 73801 / 84300) loss: 147700672108.989838
(Iteration 73901 / 84300) loss: 144743345976.487579
(Iteration 74001 / 84300) loss: 146770132830.192505
(Iteration 74101 / 84300) loss: 149071772741.924133
(Epoch 88 / 100) train acc: 0.108000; val_acc: 0.117333
(Iteration 74201 / 84300) loss: 145835262788.168030
(Iteration 74301 / 84300) loss: 143456838632.971252
(Iteration 74401 / 84300) loss: 150438526768.124084
(Iteration 74501 / 84300) loss: 147089773744.769623
(Iteration 74601 / 84300) loss: 147200983517.056946
(Iteration 74701 / 84300) loss: 150756316968.691162
(Iteration 74801 / 84300) loss: 152889426337.953217
(Iteration 74901 / 84300) loss: 148817249161.325317
(Iteration 75001 / 84300) loss: 154757979464.159363
(Epoch 89 / 100) train acc: 0.125000; val_acc: 0.117333
(Iteration 75101 / 84300) loss: 148213673607.355804
(Ite

In [None]:
# test accuracy for the single layer Softmax
np.sum(np.round(fc_relu_model.loss(X_test)).astype(int) == y_test) / y_test.size

In [None]:
# fc_relu Classifier - Single Layer - sketched
import fc_relu
fc_relu_model = fc_relu.fc_relu_Classifier(input_dim=d, hidden_dim=None, num_classes=10,
                 weight_scale=1e-3, reg=0.0)
config = {'learning_rate':1e-2}
fc_relu_solv = solver.Solver(fc_relu_model, data_mnist, optim_config = config,
                  num_epochs=100, batch_size=64, print_every=100)
fc_relu_solv.train()

In [None]:
# test accuracy for the single layer Softmax
test_acc = np.sum(np.argmax(softmax_model.loss(X_test),axis=1) == y_test) / len(y_test)
test_acc

In [None]:
# Softmax Classifier - Double Layer
import softmax
softmax_double_model = softmax.SoftmaxClassifier(input_dim=d, hidden_dim=500, num_classes=10,
                 weight_scale=1e-3, reg=0.0)
config = {'learning_rate':1e-2}
softmax_double_solv = solver.Solver(softmax_double_model, data_mnist, optim_config = config,
                  num_epochs=100, batch_size=64, print_every=1000)
softmax_double_solv.train()

In [None]:
# test accuracy for the single layer Softmax
test_acc = np.sum(np.argmax(softmax_double_model.loss(X_test),axis=1) == y_test) / len(y_test)
test_acc


In [None]:
#Load MNIST Data
import numpy as np
from layers import *
import solver

import torch
import torchvision

transform = torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                   (0.5, 0.5), (0.5, 0.5))
                             ])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=60000, shuffle=True)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                        download=True, transform=transform)
testloader = torch.utils.data.DataLoader(trainset, batch_size=10000, shuffle=True)

classes = ('0', '1', '2', '3',
           '4', '5', '6', '7', '8', '9')

dataiter = iter(trainloader)
X_train_full, y_train_full = dataiter.next()
X_train_full = X_train_full.numpy()
y_train_full = y_train_full.numpy()

n_train = int(np.round(0.9*X_train_full.shape[0]))

X_train = X_train_full[:n_train]
X_val = X_train_full[n_train:]
y_train = y_train_full[:n_train]
y_val = y_train_full[n_train:]
data_mnist = {'X_train':X_train, 'X_val':X_val, 'y_train':y_train, 'y_val':y_val}                              
                              
dataiter = iter(testloader)
X_test, y_test = dataiter.next()
X_test = X_test.numpy()
y_test = y_test.numpy()

In [None]:
# CNN - no batch norm or dropout
import cnn
d = X_train_full[1].shape
cnn_model = cnn.ConvNet(input_dim=d, num_filters=8, filter_size=3, \
            hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0, dtype=np.float32)
config = {'learning_rate':1e-2}
cnn_solv = solver.Solver(cnn_model, data_mnist, optim_config = config,
                  num_epochs=5, batch_size=64, print_every=2)
cnn_solv.train()