In [1]:
import torch
from torch import nn
import torchvision
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
import numpy as np
import time
from tensorflow import keras
import psutil
device='cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

In [3]:
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [4]:
# Flatten the images
x_train = x_train.reshape(-1, 28 * 28)
x_test = x_test.reshape(-1, 28 * 28)

# Convert to tensors
x_train_tensor = torch.from_numpy(x_train).float().to(device)
y_train_tensor = torch.from_numpy(y_train).long().to(device)
x_test_tensor = torch.from_numpy(x_test).float().to(device)
y_test_tensor = torch.from_numpy(y_test).long().to(device)

# Create the training dataset
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)

# Define the size of the validation set (e.g., 20% of the training set)
val_size = int(0.2 * len(train_dataset))
train_size = len(train_dataset) - val_size

# Split the dataset into training and validation datasets
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)  # No shuffling for validation
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

  x_train_tensor = torch.from_numpy(x_train).float().to(device)


In [5]:
class MLP_FashionMnist(nn.Module):
    def __init__(self):
        super(MLP_FashionMnist, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()

    def forward(self, x):
        out = self.relu1(self.fc1(x))
        out = self.relu2(self.fc2(out))
        out = self.fc3(out)
        return out

In [25]:
model = MLP_FashionMnist().to(device)

In [26]:
#loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [8]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_loss = None
        self.counter = 0

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True  # Indicate convergence
        return False  # Continue training

In [27]:
early_stopping = EarlyStopping(patience=5, delta=0.001)

In [10]:
def calculate_validation_loss(model, data_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_samples = 0

    with torch.no_grad():  # Disable gradient calculation for efficiency
        for data, target in data_loader:
            data = data.view(data.size(0), -1).to(device)  # Flatten the images
            target = target.to(device)

            # Forward pass
            outputs = model(data)

            # Calculate loss
            loss = criterion(outputs, target)

            # Accumulate loss
            total_loss += loss.item() * data.size(0)  # Multiply by batch size to get total loss
            total_samples += data.size(0)  # Count total samples

    average_loss = total_loss / total_samples  # Average loss over all samples
    return average_loss

In [11]:
def train(model, train_loader, criterion, optimizer, num_epochs):
    t1 = time.time()
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.view(data.size(0), -1).to(device), target.to(device)  # Flatten the images
            # Zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass
            outputs = model(data)
            # Calculate loss
            loss = criterion(outputs, target)
            # Backward pass
            loss.backward()
            # Optimize weights
            optimizer.step()
            # Accumulate loss
            running_loss += loss.item()
        val_loss = calculate_validation_loss(model, val_loader, criterion)
        if early_stopping(val_loss):
            print("Early stopping triggered.")
            break
        # Print loss for the epoch
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')
    t2 = time.time()
    print(f"total training time : {t2-t1}")

In [28]:
train(model, train_loader, criterion, optimizer, 100)

Epoch [1/100], Loss: 0.7823
Epoch [2/100], Loss: 0.4443
Epoch [3/100], Loss: 0.3933
Epoch [4/100], Loss: 0.3652
Epoch [5/100], Loss: 0.3445
Epoch [6/100], Loss: 0.3261
Epoch [7/100], Loss: 0.3129
Epoch [8/100], Loss: 0.3020
Epoch [9/100], Loss: 0.2916
Epoch [10/100], Loss: 0.2820
Epoch [11/100], Loss: 0.2727
Epoch [12/100], Loss: 0.2639
Epoch [13/100], Loss: 0.2569
Epoch [14/100], Loss: 0.2504
Epoch [15/100], Loss: 0.2425
Epoch [16/100], Loss: 0.2376
Epoch [17/100], Loss: 0.2314
Epoch [18/100], Loss: 0.2282
Epoch [19/100], Loss: 0.2212
Epoch [20/100], Loss: 0.2168
Epoch [21/100], Loss: 0.2108
Epoch [22/100], Loss: 0.2055
Epoch [23/100], Loss: 0.1995
Early stopping triggered.
total training time : 38.14391303062439


In [13]:
def calculate_accuracy(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient calculation for efficiency
        for data, target in data_loader:
            data = data.view(data.size(0), -1).to(device)  # Flatten the images
            target = target.to(device)

            # Forward pass
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)  # Get the class with the highest probability

            # Update correct and total counts
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total  # Calculate accuracy as a percentage
    return accuracy

In [29]:
calculate_accuracy(model, test_loader)

88.13