<a href="https://colab.research.google.com/github/georgeo30/AVL-VS-BST/blob/master/PreTrainedClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
#Mounting from google drive
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [33]:
#Dataset class that will be used to get an image and its label at a time using the gpu to save memory
#imports for this class
from torch.utils import data
from skimage import io
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

class Dataset(data.Dataset):
    #constructor
    def __init__(self, list_IDs, labels, image_transform = None ):
        self.labels = labels
        self.list_IDs = list_IDs
        self.image_transform = image_transform

    #denotes the total number of samples    
    def __len__(self):
        return len(self.list_IDs)

    #generates one sample of the data while performing the transforms required
    def __getitem__(self, index):
        ID = self.list_IDs[index]
        X, y = self.__data_generation(ID)

        return X, y
    #method used to perform the transforms on the image
    def __data_generation(self, ID): 
    
        # Read images using scikit-image io library
        #check if img is from superimposed or natural images
        belong_to_folder=ID[:4]
        if (belong_to_folder=="clss"):
          X = io.imread('gdrive/My Drive/VASTECH-SSSD/SuperImposedClassification/'+ID)
        else:
          X = io.imread('gdrive/My Drive/VASTECH-SSSD/NaturalImgs/'+ID)
        
        # Store class label
        returned_labels = int(self.labels[ID])
       
        X = np.repeat(X[:, :, np.newaxis], 3, axis=2)
        X = Image.fromarray(X, mode = 'RGB') 
        

        if self.image_transform is not None:
            # Transform works on PIL images and should return PyTorch tensor
            X = self.image_transform(X)
        return X, returned_labels

In [34]:
#imports for training
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models


In [35]:
#Transforms
transform = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

# validation_transforms = transforms.Compose([transforms.Resize(256),
#                                             transforms.CenterCrop(224),
#                                             transforms.ToTensor(),
#                                             transforms.Normalize([0.485, 0.456, 0.406], 
#                                                                  [0.229, 0.224, 0.225])])

# testing_transforms = transforms.Compose([transforms.Resize(256),
#                                          transforms.CenterCrop(224),
#                                          transforms.ToTensor(),
#                                          transforms.Normalize([0.485, 0.456, 0.406], 
#                                                               [0.229, 0.224, 0.225])])

In [36]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
print(use_cuda)

True


In [37]:
#using a sampler to get the training,test and validation folder
import os
import random
partition={"train":[],"valid":[],"test":[]}
classes=('Scanned', 'Natural')
allImgs=[]
labels={}
i=0
ntrlImgs=os.listdir('gdrive/My Drive/VASTECH-SSSD/NaturalImgs')
clssImgs=os.listdir('gdrive/My Drive/VASTECH-SSSD/SuperImposedClassification')
for i in range(len(ntrlImgs)-1):
  allImgs.append(ntrlImgs[i])
  labels[ntrlImgs[i]]=1
  allImgs.append(clssImgs[i])
  labels[clssImgs[i]]=0

random.shuffle(allImgs)
partition["train"]=allImgs[:4150]
partition["valid"]=allImgs[4150:4669]
partition["test"]=allImgs[4699:5188]
print(partition)
print(labels)
  

{'train': ['clss1487.png', 'clss2570.png', 'clss2207.png', 'ntrl792.jpg', 'clss1800.png', 'clss709.png', 'clss157.png', 'ntrl1289.jpg', 'ntrl2242.jpg', 'ntrl404.jpg', 'ntrl977.jpg', 'ntrl321.jpg', 'clss1399.png', 'ntrl12.jpg', 'ntrl1595.jpg', 'ntrl403.jpg', 'clss1091.png', 'clss1315.png', 'ntrl2346.jpg', 'clss672.png', 'clss578.png', 'clss1295.png', 'clss786.png', 'ntrl1997.jpg', 'clss1887.png', 'clss1026.png', 'clss1967.png', 'ntrl1740.jpg', 'ntrl2233.jpg', 'ntrl2460.jpg', 'ntrl1345.jpg', 'clss1640.png', 'ntrl1474.jpg', 'clss2310.png', 'clss95.png', 'ntrl988.jpg', 'clss2492.png', 'clss1261.png', 'clss505.png', 'ntrl1715.jpg', 'ntrl936.jpg', 'clss937.png', 'clss1307.png', 'ntrl1785.jpg', 'clss1603.png', 'ntrl272.jpg', 'clss2077.png', 'ntrl308.jpg', 'clss1954.png', 'ntrl2135.jpg', 'ntrl1751.jpg', 'clss1133.png', 'ntrl583.jpg', 'ntrl2468.jpg', 'ntrl517.jpg', 'clss1280.png', 'clss421.png', 'clss1045.png', 'clss1596.png', 'ntrl1936.jpg', 'clss1046.png', 'clss1124.png', 'clss1194.png', 'cls

In [38]:
# Parameters
params = {'batch_size': 32,
          'shuffle': True,
          'num_workers': 6}
max_epochs = 15

In [58]:
# Generators
#training
training_set = Dataset(partition['train'], labels,transform)
training_generator = torch.utils.data.DataLoader(training_set, **params)
#validation
validation_set = Dataset(partition['valid'], labels,transform)
validation_generator = torch.utils.data.DataLoader(validation_set, **params)
#testing
testing_set = Dataset(partition['test'], labels,transform)
testing_generator = torch.utils.data.DataLoader(testing_set, **params)

In [73]:
#building and training the network
model = models.vgg16(pretrained=True)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [74]:
# Freeze pretrained model parameters to avoid backpropogating through them
for parameter in model.parameters():
    parameter.requires_grad = False

model.classifier[6] = nn.Linear(4096,2)
model
if torch.cuda.is_available():
    model.cuda()

In [61]:
# Function for the validation pass
def validation(model, validation_generator, criterion):
    
    val_loss = 0
    accuracy = 0
    
    for local_batch, local_labels in validation_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)

        output = model.forward(local_batch)
        val_loss += criterion(output, local_labels).item()

        probabilities = torch.exp(output)
        
        equality = (local_labels.data == probabilities.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return val_loss, accuracy

In [75]:
#loss function
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [18]:
!pip install workspace_utils

[31mERROR: Could not find a version that satisfies the requirement workspace_utils (from versions: none)[0m
[31mERROR: No matching distribution found for workspace_utils[0m


In [76]:
#Training

def train_classifier():
  epochs=15
  steps=0
  print_every=32

  for epoch in range(epochs):
    model.train()
    valid_loss=0.0
    running_loss=0.0
    training_total=0
    training_correct=0
    valid_total=0
    valid_correct=0
    i=0
    j=0

    for local_batch, local_labels in training_generator:
      steps += 1
      # Transfer to GPU
      local_batch, local_labels = local_batch.to(device), local_labels.to(device)
      optimizer.zero_grad()
      outputs=model.forward(local_batch)
      loss=criterion(outputs,local_labels)
      loss.backward()
      optimizer.step()

      running_loss +=loss.item()
      _, training_predicted = torch.max(outputs.data, 1)

      training_total += local_labels.size(0)
      training_correct += (training_predicted == local_labels).sum().item()
      if i % 32 == 31:
        print('[%d, %3d] loss: %.3f' %(epoch + 1, i + 1, running_loss / 32))
        running_loss = 0.0
      i+=1
    #Validation
    print("Validating...")
    model.eval()
    with torch.set_grad_enabled(False):
      for local_batch, local_labels in validation_generator:
        # Transfer to GPU
        local_batch, local_labels = local_batch.to(device), local_labels.to(device)
        outputs = model.forward(local_batch)
        loss = criterion(outputs, local_labels)
        # print statistics
        valid_loss += loss.item()
        _, valid_predicted = torch.max(outputs.data, 1)

        valid_total += local_labels.size(0)
        valid_correct += (valid_predicted == local_labels).sum().item()
        #print(valid_loss)
        if j %  10== 9:
          print('[%d, %3d] loss: %.3f' %(epoch + 1, j + 1, valid_loss / 10))

          valid_loss = 0.0
        j+=1
    print('Accuracy For this training: '+str((100 * training_correct / training_total))+"%")
    print('Accuracy For this validation: '+str((100 * valid_correct / valid_total))+"%")

train_classifier()


[1,  32] loss: 0.120
[1,  64] loss: 0.004
[1,  96] loss: 0.002
[1, 128] loss: 0.002
Validating...
[1,  10] loss: 0.002
Accuracy For this training: 98.96385542168674%
Accuracy For this validation: 100.0%
[2,  32] loss: 0.002
[2,  64] loss: 0.002
[2,  96] loss: 0.002
[2, 128] loss: 0.002
Validating...
[2,  10] loss: 0.001
Accuracy For this training: 100.0%
Accuracy For this validation: 100.0%
[3,  32] loss: 0.002
[3,  64] loss: 0.001
[3,  96] loss: 0.001
[3, 128] loss: 0.001
Validating...
[3,  10] loss: 0.001
Accuracy For this training: 100.0%
Accuracy For this validation: 100.0%
[4,  32] loss: 0.001
[4,  64] loss: 0.001
[4,  96] loss: 0.001
[4, 128] loss: 0.001
Validating...
[4,  10] loss: 0.001
Accuracy For this training: 100.0%
Accuracy For this validation: 100.0%
[5,  32] loss: 0.001
[5,  64] loss: 0.001
[5,  96] loss: 0.001
[5, 128] loss: 0.001
Validating...
[5,  10] loss: 0.001
Accuracy For this training: 100.0%
Accuracy For this validation: 100.0%
[6,  32] loss: 0.001
[6,  64] los

In [78]:
PATH = 'gdrive/My Drive/VASTECH-SSSD/pretrained_classifier.pth'
torch.save(model.state_dict(), PATH)