<a href="https://colab.research.google.com/github/darthwaydr007/gan/blob/master/Imbalanced_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import pandas as pd
import random
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU available.' % torch.cuda.device_count())
    print('GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU')
    device = torch.device("cpu")


There are 1 GPU available.
GPU: Tesla T4


In [None]:
mnist_train = pd.read_csv('/content/sample_data/mnist_train_small.csv')
test_mnist = pd.read_csv('/content/sample_data/mnist_test.csv')
mnist_train.rename(columns={'6':'label'}, 
                 inplace=True)

In [None]:
train_labels = mnist_train[['label']]
classes = train_labels['label'].unique()

In [None]:
imbalanced_mnist = pd.DataFrame()
for i in classes:
  temp = mnist_train[mnist_train['label'] == i]
  if i%2 == 0:
    temp = temp[:200]
  else:
    temp = temp[:1500]
  imbalanced_mnist = pd.concat([imbalanced_mnist , temp])
imbalanced_mnist = imbalanced_mnist.sample(frac=1).reset_index(drop=True) 

In [None]:
balanced_mnist = pd.DataFrame()
for i in classes:
  temp = mnist_train[mnist_train['label'] == i]
  temp = temp[:1500]
  balanced_mnist = pd.concat([balanced_mnist , temp])
balanced_mnist = balanced_mnist.sample(frac=1).reset_index(drop=True) 

In [None]:
def split_dataset(imbalanced_mnist):
  imbalanced_mnist = imbalanced_mnist.sample(frac=1).reset_index(drop=True)
  labels = imbalanced_mnist.iloc[:,0]
  imbalanced_mnist.drop(imbalanced_mnist.columns[0], axis=1, inplace=True)
  return imbalanced_mnist.values , labels.values

In [None]:
class MNISTDataset(Dataset):
  def __init__(self , dataset):
    self.data = dataset
    self.dataset ,  self.labels = split_dataset(self.data)
    self.dataset = self.dataset
    self.labels = self.labels 
    self.length = len(self.labels)

  def __len__(self):
    return self.length

  def __getitem__(self , index):
    return self.dataset[index] ,self.labels[index]

In [None]:
BATCH_SIZE = 48
imbalanced = MNISTDataset(imbalanced_mnist)
balances = MNISTDataset(balanced_mnist)
test  = MNISTDataset(test_mnist)
train_loader_imbalanced = torch.utils.data.DataLoader(imbalanced , batch_size = BATCH_SIZE, shuffle = True , drop_last=True)
train_loader_balanced = torch.utils.data.DataLoader(balances , batch_size = BATCH_SIZE, shuffle = True , drop_last=True)
test_loader = torch.utils.data.DataLoader(test , batch_size = BATCH_SIZE, shuffle = True , drop_last=True)

In [None]:
class mnistclassifier(nn.Module):
  def __init__(self):
    super().__init__()
    self.model = nn.Sequential(
            nn.Linear(784, 1024),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.2),
            nn.Linear(128, 10),
            nn.Softmax()
        )
  
  def forward(self, x):
        x = self.model(x.view(x.size(0),784))
        return x.cuda()
        #return F.log_softmax(x).cuda()


In [None]:
lr = 0.0002
EPOCHS = 10
criterion = nn.CrossEntropyLoss()
classifier1 = mnistclassifier().to(device)
optimizer1 = torch.optim.Adam(classifier1.parameters(), lr=1e-4, weight_decay=1e-5)
classifier2 = mnistclassifier().to(device)
optimizer2 = torch.optim.Adam(classifier2.parameters(), lr=1e-4, weight_decay=1e-5)

In [None]:
for i in range(0,EPOCHS):
  classifier1.train()
  total_loss = 0
  print('======== Epoch {:} / {:} ========'.format(i + 1, EPOCHS))
  for batch , (img1 , label) in enumerate(train_loader_imbalanced):
    
    img1 = img1.cuda()
    label = label.cuda()
    
    #print(label[0])
    
    optimizer1.zero_grad()
    output = classifier1(img1.float())

    #print(output[0])

    #loss = F.nll_loss(output , label)
    loss = criterion(output , label)
    #print(loss)
    loss.backward()
    optimizer1.step()
    
    total_loss = total_loss + loss.mean().item()
    if batch % 100 == 0 and not batch == 0:
          print('AvgLoss : {:} , Batch Loss : {:}'.format(total_loss/batch , loss.mean().item()))



  input = module(input)


AvgLoss : 1.8300137722492218 , Batch Loss : 1.7580076456069946
AvgLoss : 1.658024297952652 , Batch Loss : 1.6093164682388306
AvgLoss : 1.6450414729118348 , Batch Loss : 1.6094976663589478
AvgLoss : 1.6414422500133514 , Batch Loss : 1.6358424425125122
AvgLoss : 1.6078879964351653 , Batch Loss : 1.6152281761169434
AvgLoss : 1.5830516624450683 , Batch Loss : 1.5435107946395874
AvgLoss : 1.5590054833889007 , Batch Loss : 1.5992933511734009
AvgLoss : 1.5497333753108977 , Batch Loss : 1.51008939743042
AvgLoss : 1.547432643175125 , Batch Loss : 1.5249286890029907
AvgLoss : 1.543432344198227 , Batch Loss : 1.5687576532363892


In [None]:
correct = 0
total = 0
odd_correct = 0
odd_total = 0
even_correct = 0
even_total = 0
classifier1.eval()
with torch.no_grad():
  for batch_idx, (img1 , label) in enumerate(test_loader):

    img1 = img1.cuda()
    #img1 = transform(img1.float().cpu().detach().numpy())
    #img1 = img1.view(BATCH_SIZE,784).cuda()
    #img1.requires_grad = True
    label = label.cuda()
    
    output = classifier1(img1.float())

    for idx ,i in enumerate(output):
      #print(torch.argmax(i))
      if idx%2 == 0:
        if torch.argmax(i) == label[idx]:
          even_correct += 1
        even_total +=1
      else:
        if torch.argmax(i) == label[idx]:
          odd_correct += 1
        odd_total +=1
      if torch.argmax(i) == label[idx]:
        correct += 1
      total += 1
print("accuracy : " , round(correct/total , 3))
print("even accuracy : " , round(even_correct/even_total , 3))
print("odd accuracy : " , round(odd_correct/odd_total , 3))

  input = module(input)


accuracy :  0.836
even accuracy :  0.834
odd accuracy :  0.837


In [None]:
for i in range(0,EPOCHS):
  classifier2.train()
  total_loss = 0
  print('======== Epoch {:} / {:} ========'.format(i + 1, EPOCHS))
  for batch , (img1 , label) in enumerate(train_loader_balanced):
    
    img1 = img1.cuda()
    label = label.cuda()
    
    #print(label[0])
    
    optimizer2.zero_grad()
    output = classifier2(img1.float())

    #print(output[0])

    #loss = F.nll_loss(output , label)
    loss = criterion(output , label)
    #print(loss)
    loss.backward()
    optimizer2.step()
    
    total_loss = total_loss + loss.mean().item()
    if batch % 100 == 0 and not batch == 0:
          print('AvgLoss : {:} , Batch Loss : {:}'.format(total_loss/batch , loss.mean().item()))



  input = module(input)


AvgLoss : 1.9629097282886505 , Batch Loss : 1.685844898223877
AvgLoss : 1.8126718610525132 , Batch Loss : 1.6506977081298828
AvgLoss : 1.7450712168216704 , Batch Loss : 1.6697584390640259
AvgLoss : 1.6003436291217803 , Batch Loss : 1.6080440282821655
AvgLoss : 1.5870069307088852 , Batch Loss : 1.5826722383499146
AvgLoss : 1.5780654199918112 , Batch Loss : 1.5251227617263794
AvgLoss : 1.5636235570907593 , Batch Loss : 1.5825849771499634
AvgLoss : 1.555616604089737 , Batch Loss : 1.5465222597122192
AvgLoss : 1.5520868241786956 , Batch Loss : 1.4644603729248047
AvgLoss : 1.551228187084198 , Batch Loss : 1.6008529663085938
AvgLoss : 1.5393558859825134 , Batch Loss : 1.543160080909729
AvgLoss : 1.5387337768077851 , Batch Loss : 1.5041661262512207
AvgLoss : 1.54174156665802 , Batch Loss : 1.5249437093734741
AvgLoss : 1.5323598498106004 , Batch Loss : 1.486217975616455
AvgLoss : 1.5316112192471822 , Batch Loss : 1.536866307258606
AvgLoss : 1.5356787979602813 , Batch Loss : 1.5248562097549438


In [None]:
correct = 0
total = 0
odd_correct = 0
odd_total = 0
even_correct = 0
even_total = 0
classifier2.eval()
with torch.no_grad():
  for batch_idx, (img1 , label) in enumerate(test_loader):

    img1 = img1.cuda()
    #img1 = transform(img1.float().cpu().detach().numpy())
    #img1 = img1.view(BATCH_SIZE,784).cuda()
    #img1.requires_grad = True
    label = label.cuda()
    
    output = classifier2(img1.float())

    for idx ,i in enumerate(output):
      #print(torch.argmax(i))
      if idx%2 == 0:
        if torch.argmax(i) == label[idx]:
          even_correct += 1
        even_total +=1
      else:
        if torch.argmax(i) == label[idx]:
          odd_correct += 1
        odd_total +=1
      if torch.argmax(i) == label[idx]:
        correct += 1
      total += 1
print("accuracy : " , round(correct/total , 3))
print("even accuracy : " , round(even_correct/even_total , 3))
print("odd accuracy : " , round(odd_correct/odd_total , 3))

  input = module(input)


accuracy :  0.948
even accuracy :  0.948
odd accuracy :  0.948
