In [1]:
'''This shows the fundamental structure of a PyTorch model: 
there is an __init__() method that defines the layers and other components of a model,
 and a forward() method where the computation gets done.
 Note that we can print the model, or any of its submodules, to learn about its structure.'''

import torch 

class demo(torch.nn.Module):
    def __init__(self):
        super(demo,self).__init__()

        self.linear1 = torch.nn.Linear(100,200)
        self.activation = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(200,10)
        self.softmax = torch.nn.Softmax()

    def forward(self, x):
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        return x 
    
demo = demo()

print("themodel:")
print(demo)

print(demo.linear2)

print("models parameters")
for param in demo.parameters():
    print(param)

# To train a model, we need a loss function and an optimizer.

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(demo.parameters(), lr=1e-3)


themodel:
demo(
  (linear1): Linear(in_features=100, out_features=200, bias=True)
  (activation): ReLU()
  (linear2): Linear(in_features=200, out_features=10, bias=True)
  (softmax): Softmax(dim=None)
)
Linear(in_features=200, out_features=10, bias=True)
models parameters
Parameter containing:
tensor([[-0.0201,  0.0739,  0.0990,  ..., -0.0211, -0.0580,  0.0933],
        [ 0.0789,  0.0578,  0.0324,  ..., -0.0754,  0.0539, -0.0089],
        [-0.0404,  0.0532,  0.0965,  ...,  0.0786,  0.0105, -0.0747],
        ...,
        [ 0.0752, -0.0989, -0.0302,  ..., -0.0231,  0.0799, -0.0597],
        [-0.0449, -0.0532, -0.0422,  ...,  0.0811,  0.0705,  0.0476],
        [-0.0027,  0.0256, -0.0097,  ...,  0.0447, -0.0973, -0.0157]],
       requires_grad=True)
Parameter containing:
tensor([ 0.0902,  0.0946, -0.0971, -0.0408, -0.0149, -0.0837,  0.0629, -0.0108,
         0.0325,  0.0806,  0.0027, -0.0493, -0.0018,  0.0026, -0.0935,  0.0492,
         0.0301,  0.0335,  0.0074,  0.0376,  0.0958,  0.0742, 

In [2]:
'''Working with data PyTorch has two primitives to work with data: torch.utils.data.DataLoader and torch.utils.data.Dataset.
 Dataset stores the samples and their corresponding labels,
 DataLoader wraps an iterable around the Dataset.'''
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:44<00:00, 594996.43it/s] 


Extracting data\FashionMNIST\raw\train-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 455297.31it/s]


Extracting data\FashionMNIST\raw\train-labels-idx1-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:06<00:00, 696158.23it/s] 


Extracting data\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to data\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 2544758.63it/s]


Extracting data\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to data\FashionMNIST\raw



In [3]:
"""We pass the Dataset as an argument to DataLoader. This wraps an iterable over our dataset, and supports automatic batching, sampling, shuffling and multiprocess data loading. Here we define a batch size of 64, i.e. each element in the dataloader iterable will return a batch of 64 features and labels."""
# https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
batch_size = 64 

#create dataset 
train_dataloader = DataLoader(training_data,batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size = batch_size)

for X,y in test_dataloader:
    print(f"shape of X [N,C,H,W]: {X.shape}")
    print(f"shape of y :{y.shape}{y.dtype}")
    break

shape of X [N,C,H,W]: torch.Size([64, 1, 28, 28])
shape of y :torch.Size([64])torch.int64


In [38]:
#  define a model 
class NeuralNetworkz(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = torch.nn.Flatten()
        # The nn.Flatten() layer in your neural network model serves to transform the multi-dimensional input tensor into a one-dimensional tensor, 
        # which is necessary for the subsequent fully connected (nn.Linear) layers to process the data correctly.
        self.linear_relu_stack = torch.nn.Sequential(
            torch.nn.Linear(28*28,512),
            torch.nn.ReLU(),
            torch.nn.Linear(512,512),
            torch.nn.ReLU(),
            torch.nn.Linear(512,10)
        )
    def forward (self,x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
model = NeuralNetworkz()
print(model)
        

NeuralNetworkz(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [39]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

In [47]:
def train (dataloader,model,loss_fn,optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch ,(X,y) in enumerate (dataloader):
        # prediction error 
        pred = model(X)
        loss = loss_fn(pred, y)

        # backpropagation 
        loss.backward() # --> calculates the gradients
        """Essentially, it performs the backward pass, calculating the gradients of the loss function concerning each parameter.
          These gradients are stored in the .grad attribute of each parameter."""
        optimizer.step() # --> update model parameters
        """The optimizer (e.g., SGD, Adam) uses these gradients to adjust the parameters in an attempt to minimize the loss.
          The specific update rule depends on the type of optimizer being used."""
        optimizer.zero_grad() # reset gradient to zero 
        """It resets the gradients of all the model parameters before the next iteration of the training loop.
          This is important because by default, PyTorch accumulates gradients, which means that without resetting them, 
          you would sum gradients from multiple backward passes."""

        if batch % 100 ==0:
            load,current = loss.item(), (batch+1) * len(X)
            print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")
        """This checks if the current batch number (batch) is a multiple of 100. 
        This condition ensures that the following logging code is executed only every 100 batches."""

        """current = (batch + 1) * len(X):
            This calculates the current position in the dataset. batch + 1 gives the 1-based index of the current batch. 
            len(X) gives the number of samples in the current batch. 
            Multiplying these gives the number of samples processed so far."""

In [48]:
def test(dataLoader, model,loss_fn):
    size = len(dataLoader.dataset)
    num_batches = len(dataLoader)
    model.eval()
    test_loss, correct = 0,0
    with torch.no_grad():
        for X,y in dataLoader:
            pred = model(X)
            test_loss += loss_fn(pred,y).item()
            correct += (pred.argmax(1)==y).type(torch.float).sum().item()
        test_loss /= num_batches 
        correct /= size 
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [49]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    # test(test_dataloader, model, loss_fn)
print("Done!")


Epoch 1
-------------------------------
loss:2.316178 [   64/60000]
loss:2.298798 [ 6464/60000]
loss:2.281172 [12864/60000]
loss:2.274479 [19264/60000]
loss:2.254983 [25664/60000]
loss:2.224504 [32064/60000]
loss:2.235098 [38464/60000]
loss:2.201839 [44864/60000]
loss:2.203588 [51264/60000]
loss:2.164916 [57664/60000]
Epoch 2
-------------------------------
loss:2.177467 [   64/60000]
loss:2.160147 [ 6464/60000]
loss:2.103653 [12864/60000]
loss:2.120132 [19264/60000]
loss:2.059521 [25664/60000]
loss:1.998471 [32064/60000]
loss:2.030839 [38464/60000]
loss:1.950675 [44864/60000]
loss:1.966025 [51264/60000]
loss:1.873186 [57664/60000]
Epoch 3
-------------------------------
loss:1.922543 [   64/60000]
loss:1.881085 [ 6464/60000]
loss:1.767598 [12864/60000]
loss:1.811052 [19264/60000]
loss:1.691028 [25664/60000]
loss:1.640318 [32064/60000]
loss:1.673149 [38464/60000]
loss:1.574811 [44864/60000]
loss:1.606375 [51264/60000]
loss:1.491665 [57664/60000]
Epoch 4
-------------------------------


In [50]:
# to save the internal parameters of the model 
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth") 

Saved PyTorch Model State to model.pth


In [51]:
# re-creating the model structure and loading the state dictionary into it.
model = NeuralNetworkz()
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

In [55]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
