<a href="https://colab.research.google.com/github/harsha361/pytorch_linear_nonlinear_cnn/blob/main/Untitled17.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from helper_functions import accuracy_fn
from timeit import default_timer as timer
from tqdm.auto import tqdm
import requests
from pathlib import Path
import pandas as pd
import mlxtend
from torchmetrics import ConfusionMatrix
from mlxtend.plotting import plot_confusion_matrix

# Check PyTorch and torchvision versions
print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

# Load FashionMNIST dataset
train_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

# Check dataset sizes and classes
print(f"Number of training samples: {len(train_data)}")
print(f"Number of test samples: {len(test_data)}")
print(f"Classes: {train_data.classes}")

# Visualize sample images
plt.figure(figsize=(9, 9))
for i in range(1, 17):
    img, label = train_data[i]
    plt.subplot(4, 4, i)
    plt.imshow(img.squeeze(), cmap="gray")
    plt.title(train_data.classes[label])
    plt.axis(False)
plt.show()

# Set batch size and create dataloaders
BATCH_SIZE = 32

train_dataloader = DataLoader(
    train_data,
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_dataloader = DataLoader(
    test_data,
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Define the baseline model (Model 0)
class FashionMNISTModelV0(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.Linear(in_features=hidden_units, out_features=output_shape)
        )

    def forward(self, x):
        return self.layer_stack(x)

# Initialize the baseline model
model_0 = FashionMNISTModelV0(input_shape=784, hidden_units=10, output_shape=len(train_data.classes))

# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

# Training loop
epochs = 3
for epoch in tqdm(range(epochs)):
    model_0.train()
    for batch, (X, y) in enumerate(train_dataloader):
        optimizer.zero_grad()
        y_pred = model_0(X)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()

# Evaluation on test dataset
model_0.eval()
test_loss, test_acc = 0, 0
with torch.inference_mode():
    for X, y in test_dataloader:
        y_pred = model_0(X)
        test_loss += loss_fn(y_pred, y)
        test_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)

print(f"Test loss: {test_loss:.5f}, Test accuracy: {test_acc:.2f}%")

# Define Model 1 with non-linear activation functions
class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )

    def forward(self, x: torch.Tensor):
        return self.layer_stack(x)

# Initialize Model 1
model_1 = FashionMNISTModelV1(input_shape=784, hidden_units=10, output_shape=len(train_data.classes)).to("cuda" if torch.cuda.is_available() else "cpu")

# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(), lr=0.1)

# Training loop
for epoch in tqdm(range(epochs)):
    model_1.train()
    for batch, (X, y) in enumerate(train_dataloader):
        optimizer.zero_grad()
        y_pred = model_1(X)
        loss = loss_fn(y_pred, y)
        loss.backward()
        optimizer.step()

# Evaluation on test dataset
model_1.eval()
test_loss, test_acc = 0, 0
with torch.inference_mode():
    for X, y in test_dataloader:
        y_pred = model_1(X)
        test_loss += loss_fn(y_pred, y)
        test_acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)

print(f"Test loss: {test_loss:.5f}, Test accuracy: {test_acc:.2f}%")


In [None]:
from google.colab import drive
drive.mount('/content/drive')