In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import os
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset
from torchvision.transforms import ToTensor
from pathlib import Path
import requests
import torch.nn.functional as F

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


# Applying Introduction of PyTorch Tutorial to Fashion.MNist data

In [12]:
#Getting data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

#defining loss function, weights, and bias tensors
loss_func = F.nll_loss
weights = torch.randn(28*28,10) #each image has 28x28 pixels
weights.requires_grad_()
bias = torch.zeros(10,requires_grad = True)
lr = 0.001

#"xb @ weights + bias" is our linear map and log_softmax is the nonlinear part. 
def model(xb):
    return F.log_softmax(xb @ weights + bias,dim=1)

def accuracy(xb, yb):
    max_xb = torch.argmax(xb,dim=1)
    return (max_xb == yb).float().mean()

#checking loss before training
print(loss_func(model(test_data.data.reshape(-1,28*28).float()),test_data.targets))

batch_size = 100;
total_epochs = 2

for epoch in range(total_epochs):
    #breaking up training data into batch_size chunks, convert into a vector instead of a matrix, 
    #and turn the data into floats instead of integers.
    for i in range(training_data.data.shape[0]//batch_size + 1):
        xb=training_data.data[i*batch_size:(i+1)*batch_size].reshape(-1,28*28).float()
        pred = model(xb)
        yb = training_data.targets[i*batch_size:(i+1)*batch_size]
        loss = loss_func(pred,yb)
        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            weights.grad.zero_()
            bias -= bias.grad * lr
            bias.grad.zero_()
        
#checking loss after training
print(loss_func(model(test_data.data.reshape(-1,28*28).float()),test_data.targets), accuracy(model(test_data.data.reshape(-1,28*28).float()),test_data.targets))

tensor(3569.0107, grad_fn=<NllLossBackward0>)
tensor(230.3740, grad_fn=<NllLossBackward0>) tensor(0.7594)


# Refactoring PyTorch Tutorial Code on Fashion.MNist Data

In [22]:
#Getting data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

batch_size = 100;

#defining dataset and dataloader
train_ds = TensorDataset(training_data.data.reshape(-1,28*28).float(), training_data.targets)
valid_ds = TensorDataset(test_data.data.reshape(-1,28*28).float(), test_data.targets)

def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(28*28,10)
        
    def forward(self, xb):
        return F.log_softmax(self.lin(xb),dim=1)

#defining loss function, weights, and bias tensors
loss_func = F.nll_loss
lr = 0.001

def get_model():
    model = Mnist_Logistic()
    return model, optim.SGD(model.parameters(),lr=lr)


def accuracy(xb, yb):
    max_xb = torch.argmax(xb,dim=1)
    return (max_xb == yb).float().mean()

def loss_batch(model, loss_func, xb, yb, opt = None):
    loss = loss_func(model(xb),yb)
    
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
        
    return loss.item(), len(xb)

def acc_batch(model, xb, yb):
    acc = accuracy(model(xb),yb)
    return acc, len(xb)
        
def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb,yb in train_dl:
            loss_batch(model,loss_func,xb, yb, opt)
            
        model.eval()
        with torch.no_grad():
            losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb,yb in valid_dl])
            accs, nums = zip(*[acc_batch(model, xb, yb) for xb,yb in valid_dl])
        val_loss = np.sum(np.multiply(losses,nums))/np.sum(nums)
        val_acc = np.sum(np.multiply(accs,nums))/np.sum(nums)
        
        print(epoch, val_loss,val_acc)

        

In [23]:
total_epochs = 10;
train_dl, valid_dl = get_data(train_ds,valid_ds,batch_size)
model, opt = get_model()
fit(total_epochs, model, loss_func, opt, train_dl, valid_dl)

0 104.30940696716308 0.7477999973297119
1 80.01938446044922 0.7752000033855438
2 111.76534820556641 0.7363999998569488
3 113.08875564575196 0.7674999988079071
4 121.05397827148437 0.7722999978065491
5 86.86415077209473 0.7899000000953674
6 136.05244064331055 0.7884000015258789
7 82.35311103820801 0.7968000006675721
8 65.5009937286377 0.8271000015735627
9 113.14762939453125 0.7515000021457672


# Copying Code from Pytorch Tutorial

In [32]:
sum(loss_func(model(xb),yb).data for xb, yb in valid_dl)

tensor(5193.6035)

In [3]:
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/main/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [4]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [5]:
#Getting data
x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape

#defining loss function, weights, and bias tensors
loss_func = F.nll_loss
weights = torch.randn(28*28,10) #each image has 28x28 pixels
weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10,requires_grad = True)
lr = 0.5

def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

#"xb @ weights + bias" is our linear map and log_softmax is the nonlinear part. 
def model(xb):
    return F.log_softmax(xb @ weights + bias,dim=0)

#checking loss before training
print(loss_func(model(x_valid.reshape(-1,28*28).float()),y_valid), accuracy(model(x_valid.reshape(-1,28*28).float()), y_valid))

batch_size = 64;
total_epochs = 2

for epoch in range(total_epochs):
    #breaking up training data into batch_size chunks, convert into a vector instead of a matrix, 
    #and turn the data into floats instead of integers.
    for i in range(x_train.shape[0]//batch_size + 1):
        xb=x_train.data[i*batch_size:(i+1)*batch_size].reshape(-1,28*28).float()
        pred = model(xb)
        yb = y_train[i*batch_size:(i+1)*batch_size]
        loss = loss_func(pred,yb)
        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            weights.grad.zero_()
            bias -= bias.grad * lr
            bias.grad.zero_()
        
#checking loss after training
print(loss_func(model(x_valid),y_valid), accuracy(model(x_valid), y_valid))

tensor(9.2214, grad_fn=<NllLossBackward0>) tensor(0.0952)
tensor(7.5153, grad_fn=<NllLossBackward0>) tensor(0.9013)
