In [1]:
import logging
from typing import Tuple

from fastai.vision.all import *

In [2]:


logging.basicConfig(filename='mnist.log', level=logging.INFO)
def custom_logger(text):
	logging.info(text)	
	print(text)



In [3]:
class Dataset:
    def __init__(self, data, labels, split=None, shuffle=True):
        if isinstance(data, list): data = tensor(data)
        if isinstance(labels, list): labels = tensor(labels)    

        if data.shape[0] != labels.shape[0]:
            raise ValueError("The data and labels shapes don't match")

#         labels = labels.reshape(-1, 1) # To maintain a proper shape

        if shuffle is True:
            indexes = torch.randperm(data.shape[0])
            data = data[indexes]
            labels = labels[indexes]

        if split:
            split_int = int(data.shape[0] * split)    
            self.train = Dataset(data[:split_int], labels[:split_int])
            self.valid = Dataset(data[split_int:], labels[split_int:])

        self.data = data
        self.labels = labels

    def __getitem__(self, key):
        return (self.data[key],self.labels[key])

    def __iter__(self):
        return iter((self.data, self.labels))

    def __len__(self):
        return self.data.shape[0] # could any of them
    
def Dataloader(ds, bs=100):
    return [ds[pos:pos + bs] for pos in range(0, len(ds), bs)]

In [7]:
# Forward Pass Funcs
def sigmoid(z):
    return 1 / (1 + torch.exp(-z))

def softmax(x):
    return x.softmax()

class LinearModel():
    def __init__(self, shape_in, shape_out):
        self.model = nn.Linear(shape_in, shape_out)
        self.weight = self.model.weight
        self.bias = self.model.bias
        
    def __call__(self, x:torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        output = self.model(x)
        return output
    
    
torch.random.manual_seed(0)
model = LinearModel(2, 2)

y = tensor([1, 0, 1])
x = tensor([[2.,3.], [3.,4.], [1.,4.5]])

o = model(x)
cross_entr_loss = nn.CrossEntropyLoss()

print(o)

print(F.nll_loss(o.log_softmax(-1), y, reduction='none'))
print(cross_entr_loss(o,y))

tensor([[ 0.8550, -2.5355],
        [ 1.2291, -3.6379],
        [ 1.4293, -2.7341]], grad_fn=<AddmmBackward>)
tensor([3.4237, 0.0077, 4.1789], grad_fn=<NllLossBackward>)
tensor(2.5367, grad_fn=<NllLossBackward>)


In [9]:
print(accuracy(tensor([0,0,0]), o))

1.0


In [8]:
# Loss & Metric
def cross_entropy_loss(y, ypred) -> torch.Tensor:
    return F.cross_entropy(ypred, y)

def accuracy(y, ypred) -> float:
    return (ypred.argmax(dim=1)==y).float().mean().item()


In [10]:
# Training Procedures
class Optimizer():
    def __init__(self, model, lr):
        self.parameters = model.weight, model.bias
        self.lr = lr
        
    def step(self):
        for param in self.parameters:
            param.data -= self.lr*param.grad.data
            param.grad.zero_()
    
    
def validate_model(model, valid_dl):
    with torch.no_grad():
        val_loss = tensor(0.)
        val_accs = []
        for x, y in valid_dl:
            yprobs = model(x)
            val_loss += cross_entropy_loss(y, yprobs) # Batch Loss
            val_accs.append(accuracy(y, yprobs)) # Batch Acc

    return val_loss.item(), tensor(val_accs).mean().item() #Overall Loss, Acc

In [11]:
torch.stack([tensor([0,1]), tensor([1,2])])

tensor([[0, 1],
        [1, 2]])

In [19]:
(path/'training').ls()

(#10) [Path('training/2'),Path('training/7'),Path('training/9'),Path('training/8'),Path('training/5'),Path('training/3'),Path('training/4'),Path('training/6'),Path('training/0'),Path('training/1')]

In [16]:
# Download the Data
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path
custom_logger('Downloaded Data')


# Load the data
classes = [str(i) for i in range(1,10)]
for cls in classes:
    digit_paths = (path/'train'/cls).ls().sorted()
    custom_logger('Loaded Data')
    digit_tensors = torch.stack([tensor(np.array(Image.open(i))) for i in digit_paths])
    custom_logger("Converted into Tensors...")
    
    labels_factory.append(tensor([digit_tensors.shape[0]*int(cls)]))
    digit_factory.append(digit_tensors)
# data = torch.stack(digit_factory)

Downloaded Data


FileNotFoundError: [Errno 2] No such file or directory: '/home/isa/.fastai/data/mnist_png/train/1'

In [246]:


# Building Images and Labels
# data = torch.stack(three_tensors + seven_tensors).float()/255
# data = data.reshape(data.shape[0], -1)
print(f"X has size {data.shape}")

# 3 is 1 and 7 is 0
labels = tensor([0]*len(seven_tensors) + [1]*len(three_tensors))
print(f"Y has size {labels.shape}")

# Looking at a sample image
sample_image = data[0]
sample_image_size = data[0].shape
custom_logger(f"An image has a size of : {sample_image_size}")

# Building Dataset
ds =  Dataset(data, labels, split=0.8)    

# Building Dataloaders
train_dl = Dataloader(ds.train)
valid_dl = Dataloader(ds.valid)
custom_logger("Built Dataset & Dataloaders")


# Model Building, Training & Evaluation
torch.random.manual_seed(42)

ln1 = LinearModel(784, 2)
LR = 0.03
EPOCHS = 10

optim = Optimizer(ln1, LR)

for _ in range(EPOCHS):
    batch_loss = 0
    batch_labels = []
    for x_train, y_train in train_dl:
        yprobs = ln1(x_train) #forward-pass
        
        loss = cross_entropy_loss(y_train, yprobs) # calculate loss

        loss.backward() # generate gradients
        
        optim.step() # back-propagate

        # Batch Loss
        batch_loss += loss.detach().item()

    # Avg Valid Accuracy
    valid_loss, valid_acc = validate_model(ln1, valid_dl)
    print(f"\n| Epoch {_} |\n"
          f"Train Loss : {batch_loss:.2f}, Valid Accuracy : {valid_acc:.2%}\n")
    


Downloaded Data
Loaded Data
Number of Images : Threes -> 6131, Sevens -> 6265
Converted into Tensors...
X has size torch.Size([12396, 784])
Y has size torch.Size([12396])
An image has a size of : torch.Size([784])
Built Dataset & Dataloaders

| Epoch 0 |
Train Loss : 21.03, Valid Accuracy : 96.02%


| Epoch 1 |
Train Loss : 13.53, Valid Accuracy : 96.47%


| Epoch 2 |
Train Loss : 12.46, Valid Accuracy : 96.88%


| Epoch 3 |
Train Loss : 11.87, Valid Accuracy : 96.92%


| Epoch 4 |
Train Loss : 11.47, Valid Accuracy : 97.16%


| Epoch 5 |
Train Loss : 11.16, Valid Accuracy : 97.21%


| Epoch 6 |
Train Loss : 10.93, Valid Accuracy : 97.17%


| Epoch 7 |
Train Loss : 10.73, Valid Accuracy : 97.17%


| Epoch 8 |
Train Loss : 10.57, Valid Accuracy : 97.25%


| Epoch 9 |
Train Loss : 10.43, Valid Accuracy : 97.29%



In [93]:


x, y = train_dl[0][0][2], train_dl[0][1][2] # Fix broken code
model(x)


(tensor([1.]), tensor([0.5372], grad_fn=<MulBackward0>))