<a href="https://colab.research.google.com/github/arpitpatelsitapur/my-py-torch-journey/blob/main/Fashion_MNIST_pytorch_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import pandas as pd
import torch
import torch.nn as nn
from torchinfo import summary
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from keras import datasets
torch.manual_seed(7)

# check gpu availability
if torch.cuda.is_available():
  device=torch.device("cuda")
else:
  device=torch.device("cpu")

# load dataset
(X_train, y_train), (X_test, y_test) =datasets.fashion_mnist.load_data()


# Reshape the arrays to be 2-dimensional
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
y_train = y_train.reshape(y_train.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

# convert into Dataframe
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)

# convert into tensors
X_train_t = torch.tensor(X_train.values, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.long)
X_test_t = torch.tensor(X_test.values, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.long)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
# dataset and dataloader
class custom_dataset(Dataset):
  def __init__(self,X,y):
    self.X=X.reshape(-1,1,28,28)
    self.y=y
    self.n_samples=X.shape[0]

  def __len__(self):
    return self.n_samples

  def __getitem__(self,index):
    return self.X[index],self.y[index]

train_dataset=custom_dataset(X_train_t,y_train_t)
test_dataset=custom_dataset(X_test_t,y_test_t)

train_loader=DataLoader(dataset=train_dataset,batch_size=128,shuffle=True,pin_memory=True)
test_loader=DataLoader(dataset=test_dataset,batch_size=128,shuffle=False,pin_memory=True)



![CNN Architecture](https://drive.google.com/uc?export=view&id=1L2xrxQOX4tM_mYMLne4Uv9Z3io1x0Np0)

- input: (1,28,28)
- conv1: (32,28,28)
- maxPool1: (32,14,14)
- conv2: (64,14,14)
- maxPool2: (64,7,7)

In [11]:
# Define model
class myCNN(nn.Module):
    def __init__(self):
        super(myCNN, self).__init__()
        self.features=nn.Sequential(
            nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3,padding="same"),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.MaxPool2d(kernel_size=2,stride=2),
            nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,padding="same"),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2,stride=2)
        )
        self.classifier=nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=64*7*7,out_features=128),
            nn.ReLU(),
            nn.Dropout(0.35),
            nn.Linear(in_features=128,out_features=64),
            nn.ReLU(),
            nn.Dropout(0.35),
            nn.Linear(64,10)
        )

    def forward(self,x):
        x=self.features(x)
        x=self.classifier(x)
        return x



In [12]:
# Hyperparameters
input_size = X_train_t.shape[1]
hidden_size = 128
output_size = 10  # 10 classes for Fashion MNIST
num_epochs = 100
lr = 0.001

model = myCNN()
# move model to GPU
model = model.to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
print(summary(model))

# Training (one batch of dataset per epoch)
for epoch in range(num_epochs):
    total_loss=0
    for batch_X, batch_y in train_loader:

        # move batches to GPU
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        # forward pass
        outputs = model(batch_X)
        # loss calculation
        l = loss(outputs, batch_y.squeeze(1))
        # backward pass
        optimizer.zero_grad()
        l.backward()
        # updating grads
        optimizer.step()
        total_loss+=l.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Avg Loss: {total_loss/len(train_loader):.4f}")
    print("="*65)

# checking accuracy in training data
# checking if our model is overfitted
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for batch_X, batch_y in train_loader:
        # move batches to GPU
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        outputs = model(batch_X)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += batch_y.size(0)
        n_correct += (predicted == batch_y.squeeze(1)).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the training images: {acc} %')

# set model to evaluation mode
model.eval()

# testing
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for batch_X, batch_y in test_loader:
        # move batches to GPU
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        outputs = model(batch_X)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += batch_y.size(0)
        n_correct += (predicted == batch_y.squeeze(1)).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the test images: {acc} %')

Layer (type:depth-idx)                   Param #
myCNN                                    --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       320
│    └─ReLU: 2-2                         --
│    └─BatchNorm2d: 2-3                  64
│    └─MaxPool2d: 2-4                    --
│    └─Conv2d: 2-5                       18,496
│    └─ReLU: 2-6                         --
│    └─BatchNorm2d: 2-7                  128
│    └─MaxPool2d: 2-8                    --
├─Sequential: 1-2                        --
│    └─Flatten: 2-9                      --
│    └─Linear: 2-10                      401,536
│    └─ReLU: 2-11                        --
│    └─Dropout: 2-12                     --
│    └─Linear: 2-13                      8,256
│    └─ReLU: 2-14                        --
│    └─Dropout: 2-15                     --
│    └─Linear: 2-16                      650
Total params: 429,450
Trainable params: 429,450
Non-trainable params: 0
Epoch [1/100], Avg Loss: 0.4