In [1]:
# Neural Network Practice
# Classify between 3s and 8s in MNIST
# Can only look at fast.ai notebook for how the data is originally loaded

In [34]:
import fastbook as fb
??fb.URLs

In [35]:
fb.URLs.MNIST

'https://s3.amazonaws.com/fast-ai-imageclas/mnist_png.tgz'

In [37]:
path = fb.untar_data(fb.URLs.MNIST)
path.ls()

(#2) [Path('/Users/sinany/.fastai/data/mnist_png/training'),Path('/Users/sinany/.fastai/data/mnist_png/testing')]

In [68]:
# load data
train_0s = fb.torch.stack([fb.tensor(fb.Image.open(img)) for img in (path/"training"/'0').ls()]).float()/255
train_8s = fb.torch.stack([fb.tensor(fb.Image.open(img)) for img in (path/"training"/'8').ls()]).float()/255

test_0s = fb.torch.stack([fb.tensor(fb.Image.open(img)) for img in (path/"testing"/'0').ls()]).float()/255
test_8s = fb.torch.stack([fb.tensor(fb.Image.open(img)) for img in (path/"testing"/'8').ls()]).float()/255

In [None]:
# baseline model: compare distance to "mean" 0 or 8

# calculate the mean digits
mean_0 = train_0s.mean(0) # take the mean across images (the first dimension)
mean_8 = train_8s.mean(0)

# classify as '0' for 0, '1' for 8
def baseline(x):
    return ((mean_0 - x).abs().mean((1,2)) > (mean_8 - x).abs().mean((1,2))).float()

y_base0 = baseline(test_0s)
y_base8 = baseline(test_8s)

print(f"baseline accurracy for 0s is {1 - y_base0.sum() / y_base0.size()[0]}")
print(f"baseline accurracy for 8s is {y_base8.sum() / y_base8.size()[0]}")

In [201]:
# neural model: learn weights for a two layer model that is coded from scratch (everything except gradients)

# this model is not supposed to be elegant, extensible, or efficient. it is meant to be crafted from memory
# this is an exercise in first-principles understanding of machine learning, not engineering

mid_size = 256 # how many hidden neurons?

# randomly initialize our parameters
params = [
    fb.torch.randn((28 * 28, mid_size)), # weights for layer 0
    fb.torch.randn((mid_size)), # biases for layer 0
    fb.torch.randn((mid_size, 1)), # weights for layer 1
    fb.torch.randn(1), # bias for layer 1
]

# start tracking the gradient
for i in range(len(params)):
    params[i].requires_grad_()

def model(x):
    
    x = x.flatten(1)
    
    # layer 0
    activations = x @ params[0]
    activations += params[1]
    
    # non-linearity
    activations = fb.torch.maximum(activations,fb.tensor(0)) # ReLU
    
    # layer 1
    activations = activations @ params[2]
    activations += params[3]
    
    return activations # return a probability in [0, 1] of how much the model thinks this is an 8

def accuracy():
    y_nn0 = model_pred(train_0s)
    y_nn8 = model_pred(train_8s)
    
    acc_0 = 1 - y_nn0.sum() / y_nn0.size()[0]
    acc_8 = y_nn8.sum() / y_nn8.size()[0]
    acc = ((y_nn0.size()[0] - y_nn0.sum()) + y_nn8.sum()) / (y_nn0.size()[0] + y_nn8.size()[0])
    print(acc.item(), acc_0.item(), acc_8.item())

In [None]:
# our NN has random performance: ~50% most of the time
    
# define loss function
def loss(preds, truths):
    preds = preds.sigmoid()
    return fb.torch.where(truths==1, 1-preds, preds).mean()

# create out training dataset
train_x = fb.torch.cat([train_0s, train_8s]).view(-1,28,28)
train_y = fb.tensor([0] * len(train_0s) + [1] * len(train_8s))

NUM_EPOCHS = 1000
BATCH_SIZE = 64
LEARNING_R = 0.1

for epoch in range(NUM_EPOCHS):
    indices = fb.torch.randperm(len(train_x))
    for i in range(0, len(train_x), BATCH_SIZE):
        batch_x = train_x[indices[i:i + BATCH_SIZE]]
        batch_y = train_y[indices[i:i + BATCH_SIZE]]
        
        preds = model(batch_x)
        lossv = loss(preds, batch_y)
        lossv.backward()
        for param in params:
            param.data -= param.grad * LEARNING_R
            param.grad.zero_()
            
    accuracy()

0.5901987552642822 0.9800776839256287 0.19552212953567505
0.5927467346191406 0.9807530045509338 0.19996581971645355
0.6065908074378967 0.9760256409645081 0.2326098084449768
0.6082045435905457 0.9775451421737671 0.23431892693042755
0.6155087351799011 0.9768698215484619 0.2497009038925171
0.6144046187400818 0.978220522403717 0.24611178040504456
0.631136417388916 0.9733242988586426 0.28473764657974243
0.6316459774971008 0.9753503203392029 0.2837121784687042
0.6462544798851013 0.9724801778793335 0.3160143494606018
0.6385255455970764 0.9755191802978516 0.2973850667476654
0.6258705854415894 0.9787269830703735 0.26867201924324036
0.6334295868873596 0.9777140021324158 0.28490856289863586
0.6595889329910278 0.9731554985046387 0.34216374158859253
0.6874468922615051 0.9631943106651306 0.4083062708377838
0.7100390791893005 0.9552591443061829 0.4618014097213745
0.7080855965614319 0.958129346370697 0.4549649655818939
0.7105486392974854 0.9593111872673035 0.4587250053882599
0.6991676688194275 0.96673

0.9482758641242981 0.9643761515617371 0.9319774508476257
0.9501444101333618 0.9588046669960022 0.9413775205612183
0.949889600276947 0.9616748094558716 0.9379593133926392
0.9513334631919861 0.958129346370697 0.9444539546966553
0.9492101073265076 0.9638696908950806 0.934370219707489
0.9492950439453125 0.9648826718330383 0.9335156679153442
0.946832001209259 0.9684281349182129 0.9249700903892517
0.9511635899543762 0.9618436694145203 0.9403520822525024
0.9503991603851318 0.9633631706237793 0.9372757077217102
0.9527773261070251 0.9603241682052612 0.9451375603675842
0.9526074528694153 0.9625189900398254 0.9425739049911499
0.949974536895752 0.9684281349182129 0.931293785572052
0.9490402340888977 0.9716359972953796 0.9261664748191833
0.9456429481506348 0.9760256409645081 0.9148863554000854
0.9455580115318298 0.9758568406105042 0.9148863554000854
0.9467470645904541 0.9748438596725464 0.9183045625686646
0.9464073181152344 0.9755191802978516 0.9169372916221619
0.9463223814964294 0.9758568406105042