In [1]:
import torch
from torch import nn
import torchvision
import matplotlib.pyplot as plt

In [2]:
DATA_ROOT_DIR="FashionMNIST"

In [3]:
train_data=torchvision.datasets.FashionMNIST(
root=DATA_ROOT_DIR,
train=True,
download=True,
transform=torchvision.transforms.ToTensor())
test_data=torchvision.datasets.FashionMNIST(
root=DATA_ROOT_DIR,
train=False,
download=True,
transform=torchvision.transforms.ToTensor())
"""
ToTensor()->basically scalling down from 0 to 1
"""

'\nToTensor()->basically scalling down from 0 to 1\n'

In [4]:
BATCH_SIZE=64
"""
train_data_loader-> its a methodand its actually create an python iterator. Its load our data and its take care of all our parallel processing like how you load the data and how you shuffling.

"""
train_data_loader=torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE)
test_data_loader=torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE)

In [5]:
for X, y in test_data_loader:
    print(X.shape)
    #For all of the images  we have respective classe label. we can check for that torch.unique(y)
    print(y.shape, y.dtype, torch.unique(y))
    break

torch.Size([64, 1, 28, 28])
torch.Size([64]) torch.int64 tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


In [6]:
device="cuda" if torch.cuda.is_available() else 'cpu'
device

'cpu'

# Lets create a NN

In [7]:
# nn.Module inherites the basic nn module
class NeuralNetwork(nn.Module):
    # NeuralNetwork is a child class for nn Module
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        
        self.flatten=nn.Flatten()
        
        self.linear_relu_stack=nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
            nn.ReLU()
        )
    def forward(self, X):#Forward propagation  X->input or train data
        X=self.flatten(X)
        logits=self.linear_relu_stack(X)
        return logits

In [8]:
model=NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [9]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(), lr=1e-3)

In [10]:
def train(dataloader, model, loss_fn, optimizer):
    size=len(dataloader.dataset)
    for batch, (X,y) in enumerate(dataloader):
        X,y=X.to(device), y.to(device)
        #calculate error 
        #First of all we need to forward proagate the value like whatever the data we have to pass. We will pass from the model
        #Right now model is not trained yet
        # First data point pass through the model
        pred=model(X)
        #Calculate the loss and loss we need prediction and the actual label that will give us actual value
        loss=loss_fn(pred, y)
        # BackPropagation
        # we need zero gradient because when we calculate gradient in pytorch, it accumulate the previous gradients, we do not want in every steps
        #For example we have created gradient first steps and we do not want to use same gradient in the next steps. So we have to initialize it from zero so that we can get the exact gradient for that particular steps thats why we need to make zero gradend
        
        optimizer.zero_grad()
        loss.backward()#
        optimizer.step()# with the help of optimizer updates the weight
        
        if batch %100==0:
            loss, current=loss.item(), batch*len(X)# len(X)->current data points
            print(f"loss: {loss} [{current}/{size}]")

In [11]:
def test(dataloader, model):
    size=len(dataloader.dataset)
    model.eval()
    test_loss, correct=0,0
    with torch.no_grad():
        for X,y in dataloader:
            X,y=X.to(device), y.to(device)
            pred=model(X)
            test_loss=test_loss+ loss_fn(pred,y).item()
            correct=correct + (pred.argmax(1)==y).type(torch.float).sum().item()
    test_loss=test_loss/size
    correct=correct/size
    print(f"Test_error: {100*correct}, average_loss:{test_loss}")

In [12]:
EPOCHS=3
for epoch in range(EPOCHS):
    print(f"EPOCH: {epoch+1}\n-----------")
    train(train_data_loader, model, loss_fn, optimizer)
    test(test_data_loader, model)
print("Done")
    

EPOCH: 1
-----------
loss: 2.3030874729156494 [0/60000]
loss: 2.285720109939575 [6400/60000]
loss: 2.2723944187164307 [12800/60000]
loss: 2.2647290229797363 [19200/60000]
loss: 2.2470555305480957 [25600/60000]
loss: 2.2496793270111084 [32000/60000]
loss: 2.227506637573242 [38400/60000]
loss: 2.2214245796203613 [44800/60000]
loss: 2.217020034790039 [51200/60000]
loss: 2.2040748596191406 [57600/60000]
Test_error: 46.79, average_loss:0.034445042872428895
EPOCH: 2
-----------
loss: 2.206491470336914 [0/60000]
loss: 2.17490291595459 [6400/60000]
loss: 2.1507813930511475 [12800/60000]
loss: 2.158328056335449 [19200/60000]
loss: 2.095644235610962 [25600/60000]
loss: 2.1326990127563477 [32000/60000]
loss: 2.084961175918579 [38400/60000]
loss: 2.0738303661346436 [44800/60000]
loss: 2.078347682952881 [51200/60000]
loss: 2.0603840351104736 [57600/60000]
Test_error: 49.18, average_loss:0.03196538891792297
EPOCH: 3
-----------
loss: 2.0645084381103516 [0/60000]
loss: 1.9991745948791504 [6400/60000]

In [13]:
torch.save(model,"model.pth")

In [15]:
classes=["T_shirt/top",
        "Trouser",
        "Pullover",
        "Dress",
         "Coat",
        "Sandal",
        "Shirt",
        "Sneaker",
        "Bag",
        "Ankel boot"]
model.eval()
X,y=test_data[0][0], test_data[0][1]
with torch.no_grad():
    X=X.to(device)
    #y=y.to(device)
    pred=model(X)
    predicted,actual=classes[pred[0].argmax(0)], classes[y]
    print(predicted, actual)

Ankel boot Ankel boot
