In [None]:
import torch
import torch.nn as nn

device = "cuda" if torch.cuda.is_available() else "cpu"
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

print(device)

Mounted at /content/drive
cuda


In [None]:
"Let us first divide the test and train data into proper format"

import os
import shutil
import random

base_dir = "animals"

animal_dir = "//content//drive//My Drive//DeepLearningCollab//Animal"
os.makedirs(animal_dir , exist_ok=True)

# Now we go into the directry containing the animals


source_dir = "//content//drive//My Drive//DeepLearningCollab//animals"

#Destination Directories

train_dir = os.path.join(animal_dir , 'train')
test_dir = os.path.join(animal_dir , 'test')

"If train and test directories do not exist"

os.makedirs(train_dir , exist_ok=True)
os.makedirs(test_dir , exist_ok=True)

"Now  we can list all the directories in source_dir"

animal_directories_list = os.listdir(source_dir)

"Now we will loop through the directories"

for animal in animal_directories_list:
    animal_D = os.path.join(source_dir , animal)

    # Get a list of all the image files in the directory animal_D
    images = [i for i in os.listdir(animal_D) if os.path.isfile(os.path.join(animal_D , i))]

    train_test_spllit = 0.8

    num_train = int(len(images) * train_test_spllit)

    random.shuffle(images) # Shuffle the images randomly
    train_images = images[:num_train]

    test_images = images[num_train:]


    # Now we have already created test and train directory but here we have to also create test and train for each of the animals

    train_dir_animal = os.path.join(train_dir , animal)
    test_dir_animal = os.path.join(test_dir , animal)

    os.makedirs(train_dir_animal , exist_ok=True)
    os.makedirs(test_dir_animal , exist_ok=True)

    "Finally we have to move all the images from source directory to destination directory using shuttle"
    for img in train_images:
        shutil.move(os.path.join(animal_D , img) , os.path.join(train_dir_animal , img))
    for img in test_images:
        shutil.move(os.path.join(animal_D , img) , os.path.join(test_dir_animal , img))

In [None]:
from pathlib import Path


train_path = os.path.join(animal_dir , "train")
test_path = os.path.join(animal_dir , "test")

animal_dir = Path(animal_dir)



# Using This we have also found out the class names and now we can convert them into Dict format

from typing import Tuple , Dict , List
train_path = Path(train_path)
test_path = Path(test_path)

def find_classes(directory : str) -> Tuple[List[str] , Dict[str,int]] :

    "Get the class_names first"

    classes = sorted(
        entry.name for entry in list(os.scandir(directory)) if entry.is_dir()
    )

    # Get the case to handle any kind of error

    if not classes:
        raise FileNotFoundError(f"Could not find any classes in {directory}")

    classes_to_idx = {cls_name : i for i , cls_name in enumerate(classes)}

    return classes , classes_to_idx

In [None]:
"Now we will enter into the part where we will be transforming the image and creating Datasets and DataLoaders"
from torch.utils.data import DataLoader , TensorDataset , Dataset
from torchvision import transforms
import random
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt

"Lets write the transform for training Data and Testing Data"

transforms_train = transforms.Compose([
    transforms.Resize((224, 224)),   #must same as here
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(p = 0.5), # data augmentation
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # normalization
])
transforms_test = transforms.Compose([
    transforms.Resize((224, 224)),   #must same as here
    transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

"""
Creating The Custom Dataset For One Vs Rest Classification:

1) In the custom dataset we must take in decide which will be our target class and which will be the other classes.
2) The target class will have the label as 1 and the other classes will have label as 0

"""
class CustomDataset(Dataset):

    def __init__(self , target_class_index , target_directory : str , transform = None):
        super().__init__()
        self.label = []
        self.img = []
        self.transform = transform

        # Here we can find the classes using find classes method

        classes , _ = find_classes(target_directory)
        target_class_name = classes[target_class_index]

        for Class in classes:

            if(Class == target_class_name):
              limit = 1
            else:
              limit = 0.1

            directory = os.path.join(target_directory , Class)
            for l , name in enumerate(os.listdir(directory)):
                if(l>=limit*len(os.listdir(directory))):
                    break
                final_path = os.path.join(directory , name)
                self.img.append(final_path)

                if(Class == target_class_name):
                    self.label.append(1)
                else:
                    self.label.append(0)

    def __len__(self):
        return len(self.img)

    def load_image(self, index: int) -> Image.Image:
        image_path = self.img[index]
        img = Image.open(image_path)
        return img

    def __getitem__(self, idx: int):
        image = Image.open(self.img[idx])


        Label = self.label[idx]

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(Label, dtype=torch.long)



In [None]:
#### Train model

from sklearn.model_selection import KFold

train_loss=[]
train_accuary=[]
test_loss=[]
test_accuary=[]

classes , _ = find_classes(train_path)

# Extract indices
indices = list(range(len(train_dataset)))
# Define Stratified K-Fold cross-validation
skf = KFold(n_splits=3, shuffle=True)

# Iterate over folds
for fold, (train_index, val_index) in enumerate(skf.split(indices, [train_dataset.label[idx] for idx in indices])):
    print("FOLD : " , fold)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_index)
    val_sampler = torch.utils.data.SubsetRandomSampler(val_index)

    # Create data loaders for training and validation
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=24, sampler=train_sampler, num_workers=2)
    val_loader = torch.utils.data.DataLoader(train_dataset, batch_size=24, sampler=val_sampler, num_workers=2)

    model = models.resnet50(pretrained=True)

    # Since Resnet50 has 1000 out_features we will need to change it because our model has 1000 features.

    num_features = model.fc.in_features
    # Add a fully-connected layer for classification
    model.fc = nn.Linear(num_features, 2)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)


    num_epochs = 30
    for epoch in range(num_epochs):
      print("Epoch {} running".format(epoch)) #(printing message)
      model.train()
      running_loss = 0
      running_corrects = 0
      total_train = 0

      # Now Load A Batch Of Images

      for i , (inputs , labels) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward Inputs and Get Outputs
        optimizer.zero_grad()
        outputs = model.forward(inputs)

        _ , preds = torch.max(outputs , 1)

        loss = criterion(outputs , labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        total_train += labels.size(0)

        running_corrects += torch.sum(preds == labels.data).item()

      epoch_loss = running_loss / len(train_sampler)
      epoch_acc = running_corrects / total_train *100

      # Append Result

      train_loss.append(epoch_loss)
      train_accuary.append(epoch_acc)

      # Print Progress

      print('[Train #{}] Loss: {:.4f} Acc: {:.4f}%'.format(epoch+1, epoch_loss, epoch_acc))

      # Testing Part

      model.eval()
      with torch.no_grad():
        running_loss = 0
        running_corrects = 0
        total_val = 0

        for i , (inputs , labels) in enumerate(val_loader):
          inputs = inputs.to(device)
          labels = labels.to(device)
          outputs = model.forward(inputs)
          _ , preds = torch.max(outputs , 1)
          loss = criterion(outputs , labels)
          print(labels)
          print(preds)
          running_loss += loss.item()
          total_val += labels.size(0)
          running_corrects += torch.sum(preds == labels.data).item()

        epoch_loss = running_loss / len(val_sampler)
        epoch_acc = running_corrects / total_val *100

        # Append result

        test_loss.append(epoch_loss)
        test_accuary.append(epoch_acc)

        # Print progress
        print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% '.format(epoch+1, epoch_loss, epoch_acc))










FOLD :  0




Epoch 0 running
[Train #1] Loss: 0.0359 Acc: 95.3347%
tensor([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
tensor([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

KeyboardInterrupt: 

In [None]:
from sklearn.metrics import confusion_matrix, roc_curve, auc, accuracy_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold, train_test_split
from torchvision import models

class_list , _ = find_classes(train_path)

# Define Stratified K-Fold cross-validation
skf = KFold(n_splits=3, shuffle=True)

for i in range(len(class_list)):
  print(class_list[i])

  # Make the lists for the confusion Matrix
  true_labels = []
  predicted_labels = []

  # Load Custom Training Dataset And Testing Dataset

  train_dataset = CustomDataset(i , train_path , transform = transforms_train)
  test_dataset = CustomDataset(i , test_path , transform = transforms_test)

  test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=24, shuffle=True, num_workers=4)

  #### Train model

  from sklearn.model_selection import KFold

  train_loss=[]
  train_accuracy=[]
  test_loss=[]
  test_accuracy=[]


  # Extract indices
  indices = list(range(len(train_dataset)))
  # Define Stratified K-Fold cross-validation
  skf = KFold(n_splits=3, shuffle=True)

  # Iterate over folds
  for fold, (train_index, val_index) in enumerate(skf.split(indices, [train_dataset.label[idx] for idx in indices])):
      print("FOLD : " , fold)
      train_sampler = torch.utils.data.SubsetRandomSampler(train_index)
      val_sampler = torch.utils.data.SubsetRandomSampler(val_index)

      # Create data loaders for training and validation
      train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=24, sampler=train_sampler, num_workers=4)
      val_loader = torch.utils.data.DataLoader(train_dataset, batch_size=24, sampler=val_sampler, num_workers=4)

      model = models.resnet50(pretrained=True)

      # Since Resnet50 has 1000 out_features we will need to change it because our model has 1000 features.

      num_features = model.fc.in_features
      # Add a fully-connected layer for classification
      model.fc = nn.Linear(num_features, 2)
      model = model.to(device)

      criterion = nn.CrossEntropyLoss()
      optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)




      num_epochs = 5
      for epoch in range(num_epochs):
        print("Epoch {} running".format(epoch)) #(printing message)
        model.train()
        running_loss = 0
        running_corrects = 0
        total_train = 0

        # Now Load A Batch Of Images

        for i , (inputs , labels) in enumerate(train_loader):
          inputs = inputs.to(device)
          labels = labels.to(device)

          # Forward Inputs and Get Outputs
          optimizer.zero_grad()
          outputs = model.forward(inputs)

          _ , preds = torch.max(outputs , 1)

          loss = criterion(outputs , labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()
          total_train += labels.size(0)

          running_corrects += torch.sum(preds == labels.data).item()

        epoch_loss = running_loss / len(train_sampler)
        epoch_acc = running_corrects / total_train *100

        # Append Result

        train_loss.append(epoch_loss)
        train_accuracy.append(epoch_acc)

        # Print Progress

        print('[Train #{}] Loss: {:.4f} Acc: {:.4f}%'.format(epoch+1, epoch_loss, epoch_acc))

        # Testing Part

        model.eval()
        with torch.no_grad():
          running_loss = 0
          running_corrects = 0
          total_val = 0

          for i , (inputs , labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model.forward(inputs)
            _ , preds = torch.max(outputs , 1)
            loss = criterion(outputs , labels)

            running_loss += loss.item()
            total_val += labels.size(0)
            running_corrects += torch.sum(preds == labels.data).item()

          epoch_loss = running_loss / len(val_sampler)
          epoch_acc = running_corrects / total_val *100

          # Append result

          test_loss.append(epoch_loss)
          test_accuracy.append(epoch_acc)

          # Print progress
          print('[Test #{}] Loss: {:.4f} Acc: {:.4f}% '.format(epoch+1, epoch_loss, epoch_acc))

  with torch.no_grad():
    running_corrects = 0
    total_test = 0

    for i , (inputs , labels) in enumerate(test_loader):
      inputs = inputs.to(device)
      labels = labels.to(device)
      outputs = model.forward(inputs)
      _ , preds = torch.max(outputs , 1)
      running_corrects += torch.sum(preds == labels.data).item()
      total_test += labels.size(0)
      true_labels.extend(labels.cpu().numpy())
      predicted_labels.extend(preds.cpu().numpy())

    epoch_acc = running_corrects / total_test *100

    print('Confusion Matrix')
    print(confusion_matrix(true_labels, predicted_labels))


    plt.figure(figsize=(10, 7))
    plt.plot(train_accuracy, color='green', label='train accuracy')
    plt.plot(test_accuracy, color='blue', label='validataion accuracy')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.legend()
    plt.show()













Output hidden; open in https://colab.research.google.com to view.