## Import dependencies

In [1]:
!nvidia-smi

Sun Jan 21 10:15:19 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   66C    P8              11W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import torchvision
import time
import os
import copy
import pickle
import cv2
import numpy as np
import matplotlib.pyplot as plt
import imageio as iio
import imgaug.augmenters as ia
import imgaug
import shutil

from tqdm import tqdm
from pathlib import Path
from PIL import Image, ImageOps


import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.data import random_split

# Data Augmentation

In [12]:
data_dir = '/content/drive/MyDrive/Xu ly anh /Group_Thanh_Ha/dataset'

class_names = sorted(os.listdir(data_dir))

iter = ['/' + class_name for class_name in class_names]

In [13]:
sometimes = lambda aug: ia.Sometimes(0.5, aug)

# Define our sequence of augmentation steps that will be applied to every image.
seq = ia.Sequential(
    [
        #
        # Apply the following augmenters to most images.
        #
        #ia.Fliplr(0.5), # horizontally flip 50% of all images
        #ia.Flipud(0.2), # vertically flip 20% of all images

        # crop some of the images by 0-10% of their height/width
        #sometimes(ia.Crop(percent=(0, 0.1))),

        # Apply affine transformations to some of the images
        # - scale to 80-120% of image height/width (each axis independently)
        # - translate by -20 to +20 relative to height/width (per axis)
        # - rotate by -45 to +45 degrees
        # - shear by -16 to +16 degrees
        # - order: use nearest neighbour or bilinear interpolation (fast)
        # - mode: use any available mode to fill newly created pixels
        #         see API or scikit-image for which modes are available
        # - cval: if the mode is constant, then use a random brightness
        #         for the newly created pixels (e.g. sometimes black,
        #         sometimes white)
        sometimes(ia.Affine(
            #scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            #rotate=(-45, 45),
            shear=(-16, 16),
            order=[0, 1],
            cval=(0, 255),
            mode=imgaug.ALL
        )),

        #
        # Execute 0 to 5 of the following (less important) augmenters per
        # image. Don't execute all of them, as that would often be way too
        # strong.
        #
        ia.SomeOf((0, 5),
            [
                # Convert some images into their superpixel representation,
                # sample between 20 and 200 superpixels per image, but do
                # not replace all superpixels with their average, only
                # some of them (p_replace).
                sometimes(
                    ia.Superpixels(
                        p_replace=(0, 1.0),
                        n_segments=(20, 200)
                    )
                ),

                # Blur each image with varying strength using
                # gaussian blur (sigma between 0 and 3.0),
                # average/uniform blur (kernel size between 2x2 and 7x7)
                # median blur (kernel size between 3x3 and 11x11).
                ia.OneOf([
                    ia.GaussianBlur((0, 3.0)),
                    ia.AverageBlur(k=(2, 7)),
                    ia.MedianBlur(k=(3, 11)),
                ]),

                # Sharpen each image, overlay the result with the original
                # image using an alpha between 0 (no sharpening) and 1
                # (full sharpening effect).
                ia.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),

                # Same as sharpen, but for an embossing effect.
                ia.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),

                # Search in some images either for all edges or for
                # directed edges. These edges are then marked in a black
                # and white image and overlayed with the original image
                # using an alpha of 0 to 0.7.
                sometimes(ia.OneOf([
                    ia.EdgeDetect(alpha=(0, 0.7)),
                    ia.DirectedEdgeDetect(
                        alpha=(0, 0.7), direction=(0.0, 1.0)
                    ),
                ])),

                # Add gaussian noise to some images.
                # In 50% of these cases, the noise is randomly sampled per
                # channel and pixel.
                # In the other 50% of all cases it is sampled once per
                # pixel (i.e. brightness change).
                ia.AdditiveGaussianNoise(
                    loc=0, scale=(0.0, 0.05*255), per_channel=0.5
                ),

                # Either drop randomly 1 to 10% of all pixels (i.e. set
                # them to black) or drop them on an image with 2-5% percent
                # of the original size, leading to large dropped
                # rectangles.
                ia.OneOf([
                    ia.Dropout((0.01, 0.1), per_channel=0.5),
                    ia.CoarseDropout(
                        (0.03, 0.15), size_percent=(0.02, 0.05),
                        per_channel=0.2
                    ),
                ]),

                # Invert each image's channel with 5% probability.
                # This sets each pixel value v to 255-v.
                #ia.Invert(0.05, per_channel=True), # invert color channels

                # Add a value of -10 to 10 to each pixel.
                #ia.Add((-10, 10), per_channel=0.5),

                # Change brightness of images (50-150% of original value).
                ia.Multiply((0.5, 1.5), per_channel=0.5),

                # Improve or worsen the contrast of images.
                ia.LinearContrast((0.5, 2.0), per_channel=0.5),

                # Convert each image to grayscale and then overlay the
                # result with the original with random alpha. I.e. remove
                # colors with varying strengths.
                #ia.Grayscale(alpha=(0.0, 1.0)),

                # In some images move pixels locally around (with random
                # strengths).
                sometimes(
                    ia.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)
                ),

                # In some images distort local areas with varying strength.
                # sometimes(ia.PiecewiseAffine(scale=(0.01, 0.05)))
            ],
            # do all of the above augmentations in random order
            random_order=True
        )
    ],
    # do all of the above augmentations in random order
    random_order=True)

def sequence(source, target, num):
    for i in iter:
        if len(os.listdir(Path(target + "/" + i))):
          continue
        count = 0
        for file in Path(source + "/" + i).iterdir():
            img = iio.imread(file)
            count += 1
            for k in tqdm(range(num)):
                imgnew = seq.augment_image(img);
                imgnew = cv2.cvtColor(imgnew, cv2.COLOR_RGBA2RGB)
                iio.imwrite(target + '/' + i + '/' + str(count) + "_v" + str(k) + '.JPG', imgnew)
        #     iio.imwrite(target + '/' + i + '/' + str(count) + '.JPG', img)
        print(f'Class {i} done!')

In [None]:
source = '/content/drive/MyDrive/Xu ly anh /Group_Thanh_Ha/dataset'
target = '/content/datadir'

os.makedirs(target, exist_ok=True)

num = 100

for class_name in class_names:
    try:
      os.makedirs(os.path.join(target, class_name), exist_ok=True)
    except:
      pass

for i in iter:
    count = 0
    for file in Path(source + "/" + i).iterdir():
        img = iio.imread(file)
        count += 1
        img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
        iio.imwrite(target + '/' + i + '/' + str(count) + '.JPG', img)
    print(len(os.listdir(Path(target + '/' + i))))

sequence(source, target, num)

## Splitting data into training set and testing set

In [None]:
source = '/content/datadir'
target = '/content/datadir_splitted'

os.makedirs(target, exist_ok=True)

train_dir = f'{target}/train'
valid_dir = f'{target}/valid'

# Create output folders if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)

class_folders = os.listdir(source)

# Iterate through each class folder in the input folder
for i in tqdm(range(len(os.listdir(source)))):
    class_path = os.path.join(source, class_folders[i])

    # Skip non-directory entries
    if not os.path.isdir(class_path):
        continue

    # List all files in the class folder
    files = os.listdir(class_path)

    # Shuffle the files with a fixed seed for reproducibility
    random.seed(0)
    random.shuffle(files)

    # Calculate the split index based on the split ratio
    split_index = int(len(files) * 0.8)

    # Split the files into training and validation sets
    train_files = files[:split_index]
    valid_files = files[split_index:]

    # Move files to the corresponding output folders
    for file in train_files:
        src_path = os.path.join(class_path, file)
        dest_path = os.path.join(train_dir, class_folders[i], file)
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        shutil.copy(src_path, dest_path)

    for file in valid_files:
        src_path = os.path.join(class_path, file)
        dest_path = os.path.join(valid_dir, class_folders[i], file)
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        shutil.copy(src_path, dest_path)

# Download augmented data

In [4]:
!pip install --upgrade --no-cache-dir gdown

Collecting gdown
  Downloading gdown-5.0.0-py3-none-any.whl (16 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 4.6.6
    Uninstalling gdown-4.6.6:
      Successfully uninstalled gdown-4.6.6
Successfully installed gdown-5.0.0


In [5]:
!gdown --id 1Y9V6qlz5qNSNSHsNWW1NAzm97uN_QrTN

Downloading...
From (original): https://drive.google.com/uc?id=1Y9V6qlz5qNSNSHsNWW1NAzm97uN_QrTN
From (redirected): https://drive.google.com/uc?id=1Y9V6qlz5qNSNSHsNWW1NAzm97uN_QrTN&confirm=t&uuid=bba3a864-cfa6-4c34-833a-e6ef8c62c904
To: /content/data_sequence_trial_splitted_2_200.zip
100% 1.69G/1.69G [00:14<00:00, 118MB/s] 


In [6]:
!unzip data_sequence_trial_splitted_2_200

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/1_v71.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/3_v101.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/1_v144.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/4_v118.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/4_v3.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/5_v106.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/1_v174.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/3_v120.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/4_v76.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/2_v53.JPG  
  inflating: data_sequence_trial_2_200/train/chained_for_life_hotel/5_v138.JPG  
  inflating: data_sequence_trial_2_200/train/chai

# Transfer learning

## Preprocessing data

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),

}

data_dir = '/content/data_sequence_trial_2_200'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

print(dataset_sizes)

cuda:0
{'train': 26203, 'valid': 6560}




## Training model

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

## VGG16

In [None]:
model_conv = torchvision.models.vgg16(pretrained=True)

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.classifier[6].in_features
model_conv.classifier[6] = nn.Linear(num_ftrs, len(class_names))

model_conv = model_conv.to(device)

# Freeze the layers except the final classifier layer
for param in model_conv.features.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.classifier[6].parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv= train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler,
                       num_epochs=10)



Epoch 0/9
----------
train Loss: 1.6467 Acc: 0.6373
valid Loss: 0.9204 Acc: 0.8233

Epoch 1/9
----------
train Loss: 0.8820 Acc: 0.8011
valid Loss: 0.6950 Acc: 0.8497

Epoch 2/9
----------


In [None]:
# Save the entire model
torch.save(model_conv, '/content/drive/MyDrive/cs406/TRECVID/checkpoints/best_model_aug-vgg16_200_2.pth')

In [None]:
# Load the model
loaded_model = torch.load('/content/drive/MyDrive/cs406/TRECVID/checkpoints/best_model_aug-vgg16_200_2.pth')

# If you want to use the loaded model for inference, make sure to set it to evaluation mode
loaded_model.eval()

## MobilenetV2

In [None]:
model_conv = torchvision.models.mobilenet_v2(pretrained=True)

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.classifier[1].in_features
model_conv.classifier[1] = nn.Linear(num_ftrs, len(class_names))

model_conv = model_conv.to(device)

for param in model_conv.features.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.classifier[1].parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv= train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler,
                       num_epochs=10)

In [None]:
# Save the entire model
torch.save(model_conv, '/content/drive/MyDrive/TRECVID/checkpoints/best_model_aug-mobilenet_500_1.pth')

# Load the model
loaded_model = torch.load('/content/drive/MyDrive/TRECVID/checkpoints/best_model_aug-mobilenet_500_1.pth')

# If you want to use the loaded model for inference, make sure to set it to evaluation mode
loaded_model.eval()

## ResNet-18

In [None]:
model_conv = torchvision.models.resnet18(pretrained=True)

count = 0

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, len(class_names))

model_conv = model_conv.to(device)
count = 0
m = 0;
for i in model_conv.children():
    m += 1
for i in model_conv.children():
    count += 1
    if count < m - 1:
        for j in i.parameters():
            j.requires_grad = False

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv= train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler,
                       num_epochs=10)

In [None]:
# Save the entire model
torch.save(model_conv, '/content/drive/MyDrive/TRECVID/checkpoints/best_model_aug-resnet_500_2.pth')

In [None]:
# Load the model
loaded_model = torch.load('/content/drive/MyDrive/TRECVID/checkpoints/best_model_aug-resnet_500_2.pth')

# If you want to use the loaded model for inference, make sure to set it to evaluation mode
loaded_model.eval()