In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch
import torchvision
%matplotlib inline
matplotlib.rcParams['figure.facecolor']='white'

In [None]:
dataset=torchvision.datasets.MNIST(root="data/",download=True,transform=torchvision.transforms.ToTensor())

In [None]:
len(dataset)

In [None]:
type(dataset)

In [None]:
image,label=dataset[193]
plt.imshow(image[0],cmap='gray')
plt.title(label)
plt.show()

In [None]:
val_size=10000
train_size=len(dataset)-val_size
train_ds,val_ds=torch.utils.data.random_split(dataset,(train_size,val_size))

In [None]:
len(train_ds)

In [None]:
len(val_ds)

In [None]:
test_dataset=torchvision.datasets.MNIST(root="data/",train=False,transform=torchvision.transforms.ToTensor())

In [None]:
batch_size=64
train_dataloader=torch.utils.data.DataLoader(train_ds,batch_size,shuffle=True,pin_memory=True,num_workers=4)
val_dataloader=torch.utils.data.DataLoader(val_ds,batch_size,shuffle=True,pin_memory=True,num_workers=4)
test_dataloader=torch.utils.data.DataLoader(test_dataset,batch_size,shuffle=True,pin_memory=True,num_workers=4)

In [None]:
for batch in train_dataloader:
    images,labels=batch
    print(images.shape)
    plt.imshow(images[0,0],cmap="gray")
    plt.show()
    print(labels[0].item())
    break

In [None]:
images_=torchvision.utils.make_grid(images,nrow=8)
images_=images_.permute(1,2,0)
plt.imshow(images_,cmap='gray')
plt.figure(figsize=(1,1))
plt.axis("off")
plt.show()

In [None]:
def accuracy(pred,labels):
    _,maxP=torch.max(pred,dim=1)
    return torch.tensor(torch.sum(maxP==labels).item()/len(labels))

In [None]:
class MnistModel(torch.nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super().__init__()
        self.layer1=torch.nn.Linear(input_size,hidden_size)
        self.layer2=torch.nn.Linear(hidden_size,output_size)
    def forward(self,X):
        X=X.reshape(-1,784)
        out=self.layer1(X)
        out=torch.nn.functional.relu(out)
        out=self.layer2(out)
        return out
    def training_step(self,batch):
        images,labels=batch
        out=self(images)
        loss=torch.nn.functional.cross_entropy(out,labels)
        return loss
    def validation_step(self,batch):
        images,labels=batch
        out=self(images)
        loss=torch.nn.functional.cross_entropy(out,labels)
        acc=accuracy(out,labels)
        return {"val_acc":acc,"val_loss":loss}
    def validation_step_epoch(self,outputs):
        loss_=[X["val_loss"] for X in outputs]
        loss__=torch.stack(loss_).mean()
        acc_=[X["val_acc"] for X in outputs]
        acc__=torch.stack(acc_).mean()
        return {"val_loss":loss__.item(),"val_acc":acc__.item()}
    def epoch_end(self,epoch,result):
        print("Epoch [{}] Accuracy: {:.4f} Loss: {:.4f}".format(epoch,result["val_acc"],result["val_loss"]))

In [None]:
def evaluate(model,val_dataloader):
    outputs=[model.validation_step(batch) for batch in val_dataloader]
    return model.validation_step_epoch(outputs)

In [None]:
def fit(model,epochs,lr,train_dataloader,val_dataloader,opt=torch.optim.SGD):
    history=[]
    optimizer=opt(model.parameters(),lr)
    for epoch in range(epochs):
        for batch in train_dataloader:
            loss=model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        result=evaluate(model,val_dataloader)
        model.epoch_end(epoch,result)
        history.append(result)
    return history

In [None]:
input_size=784
hidden_size=64
output_size=10

In [None]:
model1=MnistModel(input_size,hidden_size,output_size)

In [None]:
history=[evaluate(model1,val_dataloader)]
history

In [None]:
history+=fit(model1,5,0.5,train_dataloader,val_dataloader)

In [None]:
evaluate(model1,test_dataloader)

In [None]:
torch.cuda.is_available()

In [None]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")

In [None]:
device=get_default_device()

In [None]:
device

In [None]:
def to_device(data,device):
    if isinstance(data,(list,tuple)):
        return [to_device(x,device) for x in data]
    return data.to(device,non_blocking=True)

In [None]:
class dataloader_device:
    def __init__(self,data,device):
        self.data=data
        self.device=device
    def __len__(self):
        return len(self.data)
    def __iter__(self):
        for x in self.data:
            yield to_device(x,self.device)

In [None]:
train_loader=dataloader_device(train_dataloader,device)
val_loader=dataloader_device(val_dataloader,device)
test_loader=dataloader_device(test_dataloader,device)

In [None]:
model2=MnistModel(input_size,hidden_size,output_size)
model2=to_device(model2,device)

In [None]:
history_new=[evaluate(model2,val_loader)]
history_new

In [None]:
history_new+=fit(model2,5,0.5,train_loader,val_loader)

In [None]:
history_new+=fit(model2,5,0.05,train_loader,val_loader)

In [None]:
evaluate(model2,test_loader)

In [None]:
loss=[X["val_loss"] for X in history_new]
plt.plot(loss,"-*")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

In [None]:
loss=[X["val_acc"] for X in history_new]
plt.plot(loss,"-*")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()

In [None]:
evaluate(model2,test_loader)

In [None]:
model2

In [None]:
model2.layer1.weight.numel()+model2.layer1.bias.numel()+model2.layer2.weight.numel()+model2.layer2.bias.numel()

In [None]:
def predict(model2,image,device):
    image=to_device(image,device)
    image=image.reshape(-1,784)
    pred=model2(image)
    _,ans=torch.max(pred,dim=1)
    return ans.item()

In [None]:
image,label=test_dataset[8989]
plt.imshow(image[0],cmap="gray")
plt.show()
print("Output: ",label," Predicted: ",predict(model2,image,device))

In [None]:
torch.save(model2,"best.pth")