## Training a CNN

### Table Of Content:
* [Downloading Data](#Downloading-dataset)
* [Using ResNet152](#Downloading-ResNet152-model-of-CNN-family)
* [Creating Custom Dataset Class](#Creating-Custom-Dataset-class-to-further-use-during-training)
* [Label Smoothing](#LabelS)
* [Training the Model](#training)
* [Why CLIP](#CLIP)

### Downloading dataset

In [None]:
!wget https://people.cs.pitt.edu/~kovashka/ads/annotations_images.zip

In [None]:
!wget https://storage.googleapis.com/ads-dataset/resnet_negative.zip

In [None]:
!sudo apt install parallel

In [None]:
!parallel wget https://storage.googleapis.com/ads-dataset/subfolder-{}.zip ::: {1..9}

In [None]:
!unzip /content/resnet_negative.zip -d /content/images/NotAdv

### Some data processing to organise it for later use

In [None]:
import os
import shutil
from concurrent.futures import ThreadPoolExecutor

def copy_images(set_path):
    source_directory = os.path.join("images", "NotAdv", "resnet_training_negatives", set_path)
    destination_directory = os.path.join("images", "NotAdv")

    for file_name in os.listdir(source_directory):
        source_file = os.path.join(source_directory, file_name)
        destination_file = os.path.join(destination_directory, file_name)

        shutil.copy(source_file, destination_file)
        print(f"Copied {file_name} from {set_path} to NotAdv folder.")

        os.remove(source_file)
        print(f"Deleted {file_name} from {set_path}.")

sets = ["set" + str(i) for i in range(5)]

with ThreadPoolExecutor() as executor:
    executor.map(copy_images, sets)


In [None]:
!rm -R /content/images/NotAdv/resnet_training_negatives

In [None]:
import zipfile
import os
from concurrent.futures import ThreadPoolExecutor

def unzip_zip_file(zip_path, output_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(output_dir)
    print(f"Extracted {zip_path}")

def unzip_all_zip_files(directory, num_files, output_dir):
    with ThreadPoolExecutor() as executor:
        for i in range(1, num_files + 1):
            zip_file = f"subfolder-{i}.zip"
            zip_path = os.path.join(directory, zip_file)
            if os.path.exists(zip_path):
                executor.submit(unzip_zip_file, zip_path, output_dir)
            else:
                print(f"Zip file {zip_file} not found.")

directory = '/content'
num_files = 9
output_dir = 'images'

unzip_all_zip_files(directory, num_files, output_dir)


In [None]:
import os
import shutil

source_directory = "/content/images"
destination_directory = "/content/images/Adv"

if not os.path.exists(destination_directory):
    os.makedirs(destination_directory)
y
for folder_name in range(1, 11):
    source_folder = os.path.join(source_directory, str(folder_name))

    if os.path.exists(source_folder):

        for file_name in os.listdir(source_folder):
            source_file = os.path.join(source_folder, file_name)
            shutil.copy(source_file, destination_directory)
            print(f"Copied {file_name} from folder {folder_name} to Adv folder.")
    else:
        print(f"Folder {folder_name} does not exist.")


In [None]:
import os
import shutil

source_directory = "/content/images"

for folder_name in range(1, 11):
    folder_path = os.path.join(source_directory, str(folder_name))

    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
        print(f"Deleted folder {folder_name}.")
    else:
        print(f"Folder {folder_name} does not exist.")


In [None]:
!pip install timm

In [None]:
import timm
timm.list_models('*resnet*')

### Downloading ResNet152 model of CNN family

In [None]:
import timm
model = timm.create_model('resnet152', pretrained=False, num_classes=2)
model = model.to('cuda') # Move to GPU

### Creating Custom Dataset class to further use during training

In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os
Image.MAX_IMAGE_PIXELS = 99979202110

class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = sorted(os.listdir(root_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        self.images = self.load_images()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

    def load_images(self):
        images = []
        for class_dir in self.classes:
            class_idx = self.class_to_idx[class_dir]
            class_path = os.path.join(self.root_dir, class_dir)
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                images.append((img_path, class_idx))
        return images


    root_dir = '/content/images'
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])



### Splitting dataset into test and train

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from PIL import Image
import os
import torch.nn as nn
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
import random

dataset = CustomDataset(root_dir, transform=transform)

indices = list(range(len(dataset)))
random.shuffle(indices)
train_size = 25000
test_size = 8000
train_indices = indices[:train_size]
test_indices = indices[train_size:train_size+test_size]

train_loader = DataLoader(dataset, batch_size=64, sampler=torch.utils.data.SubsetRandomSampler(train_indices))
test_loader = DataLoader(dataset, batch_size=64, sampler=torch.utils.data.SubsetRandomSampler(test_indices))

print("Training images =", len(train_indices))
print("Testing images =", len(test_indices))

Training images = 25000
Testing images = 8000


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

<a class="anchor" id="LabelS"></a>
### Label smoothing (Also already available in PyTorch API)

In [None]:
class SmoothCrossEntropyLoss(nn.Module):
    def __init__(self, smoothing=0.1):
        super(SmoothCrossEntropyLoss, self).__init__()
        self.smoothing = smoothing

    def forward(self, input, target):
        log_prob = nn.functional.log_softmax(input, dim=-1)
        weight = input.new_ones(input.size()) * (self.smoothing / (input.size(-1) - 1.))

        target = target.to(input.device)
        weight = weight.to(input.device)

        weight.scatter_(-1, target.unsqueeze(-1), (1. - self.smoothing))
        loss = (-weight * log_prob).sum(dim=-1).mean()
        return loss


criterion = SmoothCrossEntropyLoss(smoothing=0.1)

<a class="anchor" id="training"></a>
### Defining configs like loss criterion and optimizer to train model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.profiler import profile, record_function, ProfilerActivity

# Loss Criterion
criterion = nn.CrossEntropyLoss()
# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 1

for epoch in range(num_epochs):

    model.train()

    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

        if (i+1) % 2 == 0:
            percent_epoch_done = (i+1) / len(train_loader) * 100
            print(f"Epoch {epoch+1}/{num_epochs}, Batch {i+1}/{len(train_loader)}, {percent_epoch_done:.2f}% done")

    epoch_loss = running_loss / len(train_loader.dataset)
    train_accuracy = 100 * correct_train / total_train
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")

    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()
    val_accuracy = 100 * correct_val / total_val
    print(f"Validation Accuracy: {val_accuracy:.2f}%")

Epoch 1/1, Batch 2/391, 0.51% done
Epoch 1/1, Batch 4/391, 1.02% done
Epoch 1/1, Batch 6/391, 1.53% done
Epoch 1/1, Batch 8/391, 2.05% done
Epoch 1/1, Batch 10/391, 2.56% done
Epoch 1/1, Batch 12/391, 3.07% done
Epoch 1/1, Batch 14/391, 3.58% done
Epoch 1/1, Batch 16/391, 4.09% done
Epoch 1/1, Batch 18/391, 4.60% done
Epoch 1/1, Batch 20/391, 5.12% done
Epoch 1/1, Batch 22/391, 5.63% done
Epoch 1/1, Batch 24/391, 6.14% done
Epoch 1/1, Batch 26/391, 6.65% done
Epoch 1/1, Batch 28/391, 7.16% done
Epoch 1/1, Batch 30/391, 7.67% done
Epoch 1/1, Batch 32/391, 8.18% done
Epoch 1/1, Batch 34/391, 8.70% done
Epoch 1/1, Batch 36/391, 9.21% done
Epoch 1/1, Batch 38/391, 9.72% done
Epoch 1/1, Batch 40/391, 10.23% done
Epoch 1/1, Batch 42/391, 10.74% done
Epoch 1/1, Batch 44/391, 11.25% done
Epoch 1/1, Batch 46/391, 11.76% done
Epoch 1/1, Batch 48/391, 12.28% done
Epoch 1/1, Batch 50/391, 12.79% done
Epoch 1/1, Batch 52/391, 13.30% done
Epoch 1/1, Batch 54/391, 13.81% done
Epoch 1/1, Batch 56/391,

<a class="anchor" id="CLIP"></a>


### ResNet achieved accuracy of 79.28%. But these models still Lack complex understanding of the image like what a symbol in image respresent. For example an image of 'dove' represents peace.

### So we should find a solution which merges the visual representation as well as the sentiments and other complex representation of image

## This is where CLIP model comes, which merges the Image and Text into same modality

In [None]:
torch.save(model.state_dict(), 'Adv_Model.pt')

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
!pip install deepspeed

In [None]:
print(prof.key_averages().table(sort_by="self_cuda_memory_usage")

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                  cudaStreamIsCapturing         0.02%     340.000us         0.02%     340.000us       1.545us       0.000us         0.00%       0.000us       0.000us           220  
                                             cudaMalloc        11.72%     162.314ms        11.72%     162.314ms       1.168ms       0.000us         0.00%       0.000us       0.000us           139  
         