In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision.transforms as transforms
import torchvision.datasets as datasets

from sklearn import decomposition
from sklearn import manifold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import copy
import random
import time
import math

In [2]:

print(torch.__version__)

2.0.1+cu117


In [3]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [4]:
ROOT = '.data'

train_data = datasets.MNIST(root=ROOT,
                            train=True,
                            download=True)

mean = train_data.data.float().mean() / 255
std = train_data.data.float().std() / 255

print(f'Calculated mean: {mean}')
print(f'Calculated std: {std}')


Calculated mean: 0.13066048920154572
Calculated std: 0.30810779333114624


In [5]:
train_transforms = transforms.Compose([
                            transforms.RandomRotation(5, fill=(0,)),
                            transforms.RandomCrop(28, padding=2),
                            transforms.ToTensor(),
                            transforms.Normalize(mean=[mean], std=[std])
                                      ])

test_transforms = transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[mean], std=[std])
                                     ])

In [6]:
train_data = datasets.MNIST(root=ROOT,
                            train=True,
                            download=True,
                            transform=train_transforms)

test_data = datasets.MNIST(root=ROOT,
                           train=False,
                           download=True,
                           transform=test_transforms)

In [7]:
VALID_RATIO = 0.7

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples

train_data, valid_data = data.random_split(train_data,
                                           [n_train_examples, n_valid_examples])

In [8]:
valid_data = copy.deepcopy(valid_data)
valid_data.dataset.transform = test_transforms

In [9]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 42000
Number of validation examples: 18000
Number of testing examples: 10000


In [10]:
BATCH_SIZE = 128

train_iterator = data.DataLoader(train_data,
                                 shuffle=True,
                                 batch_size=BATCH_SIZE)

valid_iterator = data.DataLoader(valid_data,
                                 batch_size=BATCH_SIZE)

test_iterator = data.DataLoader(test_data,
                                batch_size=BATCH_SIZE)

In [11]:
class LeNet(nn.Module):
    def __init__(self, output_dim):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=6,
                               kernel_size=5)

        self.conv2 = nn.Conv2d(in_channels=6,
                               out_channels=16,
                               kernel_size=5)

        self.fc_1 = nn.Linear(16 * 4 * 4, 120)
        self.fc_2 = nn.Linear(120, 84)
        self.fc_3 = nn.Linear(84, output_dim)

    def forward(self, x):

        # x = [batch size, 1, 28, 28]

        x = self.conv1(x)

        # x = [batch size, 6, 24, 24]

        x = F.max_pool2d(x, kernel_size=2)

        # x = [batch size, 6, 12, 12]

        x = F.relu(x)

        x = self.conv2(x)

        # x = [batch size, 16, 8, 8]

        x = F.max_pool2d(x, kernel_size=2)

        # x = [batch size, 16, 4, 4]

        x = F.relu(x)

        x = x.view(x.shape[0], -1)

        # x = [batch size, 16*4*4 = 256]

        h = x

        x = self.fc_1(x)

        # x = [batch size, 120]

        x = F.relu(x)

        x = self.fc_2(x)

        # x = batch size, 84]

        x = F.relu(x)

        x = self.fc_3(x)

        # x = [batch size, output dim]

        return x, h

In [12]:
model = LeNet(10)

In [13]:
device = torch.device('cpu')

In [14]:
optimizer = optim.Adam(model.parameters())

In [15]:
# Create a DataFrame with random values'
No_of_classes=10
No_of_clusters=1
random_values = np.random.rand(No_of_classes, No_of_clusters)
cluster_matrix = (random_values)

In [16]:
def weighted_cross_entropy_loss(y_true, y_pred, class_weights):
    epsilon = 1e-10  # Small constant to avoid division by zero
    num_samples = len(y_true)
    
    loss = 0.0
    for i in range(num_samples):
        # for c in range(len(class_weights)):
            # if y_true[i, c] == 1:
        loss -= class_weights[i] * (y_true[i] * math.log(1 + y_pred[i][y_true[i]] + epsilon))
    
    mean_loss = loss / num_samples
    return mean_loss

In [17]:
def get_weights(y_pred, y, margin_parameter, decay_rate, learning_rate):
    random_val1 = np.zeros((No_of_classes, No_of_clusters),dtype=np.float64)
    cluster_matrix_delta = (random_val1)
    random_val2 = np.zeros((No_of_classes, No_of_clusters),dtype=np.float64)
    cluster_matrix_count = (random_val2)
    observation_weights=[]
    for yi in range(len(y)):
        label=int(y[yi])
        clusters = cluster_matrix[label]
        
        dis_from_obs_label=100000
        centre=-1
        for i in clusters:
            if dis_from_obs_label> math.sqrt((y_pred[yi][label]-i)**2):
                centre=i;
                dis_from_obs_label=min(dis_from_obs_label, math.sqrt((y_pred[yi][label]-i)**2))

        dis_from_other=100000
        
        for i in range(No_of_classes):
            for j in range(No_of_clusters):
                if(i==label):
                    continue
                new_cluster=cluster_matrix[i]
                for k in new_cluster:
                    dis_from_other=min(dis_from_other, math.sqrt((y_pred[yi][label]-k)**2))

         #violation factor
        violation=max(0, dis_from_obs_label-dis_from_other+margin_parameter)
        w=np.exp(-1*decay_rate*violation)
        observation_weights.append(w)
        cluster_matrix_delta[label][int(centre)] += (2*abs(y_pred[yi][label]-centre)*w)
        cluster_matrix_count[label][int(centre)]+=1
                                           
    # loss = weighted_cross_entropy_loss(y, y_pred, observation_weights)
    #update the cluster centres
    for i in range(No_of_classes):
        for j in range(No_of_clusters):
            cluster_matrix[i][j]+=learning_rate*cluster_matrix_delta[i][j]/cluster_matrix_count[i][j]
            
    
    return observation_weights

In [18]:
def train(model, iterator, optimizer, device):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for (x, y) in tqdm(iterator, desc="Training", leave=False):

        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()

        y_pred, _ = model(x)
        
        observation_weights = torch.tensor(get_weights(y_pred, y, 0, 0.00005, 0.00005))
        
            
        criterion = nn.CrossEntropyLoss(reduction = 'none')
        
        LOSS = criterion(y_pred, y)
        
        #acc = calculate_accuracy(y_pred, y)
        loss = 0.0
        for i in range(len(observation_weights)): 
            loss += LOSS[i]*observation_weights[i] 
        
        loss.backward()
        
        optimizer.step()

        epoch_loss += loss.item()
        print(epoch_loss)
        # epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [19]:
import numpy
train(model, train_iterator, optimizer, device)


Training:   0%|          | 0/329 [00:00<?, ?it/s]

294.6721667539279
591.7860682380658
886.5545662395016
1179.4887620110662
1470.3459118419034
1759.4378284572194
2047.8769436305881
2334.2870865653667
2618.5078282301524
2900.750347374951
3180.1181063320473
3459.916076364007
3734.784059391152
4005.645822574956
4268.895880458757
4530.272686596447
4780.926472122885
5041.9757244895145
5294.794576102505
5535.853564618088
5774.1767044844655
5999.668757575248
6230.534935621837
6460.394214252415
6671.572523692081
6875.7921656594135
7064.773843347595
7250.291554949677
7428.338305456247
7605.040179363159
7808.38952972012
7976.624072852612
8135.958517256413
8287.346828558035
8460.210460486875
8624.837356080472
8788.329601876443
8930.957545218618
9101.296296538167
9266.69447333938
9430.718086538216
9565.07482036988
9698.736167148982
9831.240586004446
9959.763880256925
10112.068023910757
10256.031410917441
10397.173386261418
10552.219144862122
10701.509965834122
10825.617899728515
10962.518304253814
11093.594105744283
11206.508075549249
11336.912453

IndexError: index 1 is out of bounds for axis 0 with size 1