In [14]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torchvision.transforms as transforms 
from torchvision import models as models

from torch.utils.data import Dataset 
from torch.utils.data import DataLoader 

import cv2 
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib
import seaborn as sns
matplotlib.style.use('ggplot')

from tqdm import tqdm
import os 

# 1. Xu ly du lieu

In [65]:
path = '/home/tungnguyendinh/.fastai/data/pascal_2007/'

train_data = pd.read_csv(os.path.join(path, 'train.csv'), sep = ",")
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5011 entries, 0 to 5010
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   fname     5011 non-null   object
 1   labels    5011 non-null   object
 2   is_valid  5011 non-null   bool  
dtypes: bool(1), object(2)
memory usage: 83.3+ KB


In [66]:
test_data = pd.read_csv(os.path.join(path, 'test.csv'), sep = ",")
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4952 entries, 0 to 4951
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   fname   4952 non-null   object
 1   labels  4952 non-null   object
dtypes: object(2)
memory usage: 77.5+ KB


In [67]:
train_data.head(10)

Unnamed: 0,fname,labels,is_valid
0,000005.jpg,chair,True
1,000007.jpg,car,True
2,000009.jpg,horse person,True
3,000012.jpg,car,False
4,000016.jpg,bicycle,True
5,000017.jpg,person horse,False
6,000019.jpg,cat,True
7,000020.jpg,car,True
8,000021.jpg,dog person,True
9,000023.jpg,bicycle person,False


In [68]:
test_data.head(5)

Unnamed: 0,fname,labels
0,000001.jpg,dog person
1,000002.jpg,train
2,000003.jpg,sofa chair
3,000004.jpg,car
4,000006.jpg,pottedplant diningtable chair


NX: Anh 000006.jp classify duoc 3 vat the: potted plant, diningtable, chair. 

In [69]:
train_data.shape

(5011, 3)

In [70]:
test_data.shape

(4952, 2)

In [71]:
train_ratio = train_data.shape[0]   
test_ratio = test_data.shape[0]

In [72]:
print(train_ratio)
print(test_ratio)

5011
4952


In [73]:
train_labels = train_data['labels']
train_labels.head(10)

0             chair
1               car
2      horse person
3               car
4           bicycle
5      person horse
6               cat
7               car
8        dog person
9    bicycle person
Name: labels, dtype: object

In [74]:
train_labels[:10]

0             chair
1               car
2      horse person
3               car
4           bicycle
5      person horse
6               cat
7               car
8        dog person
9    bicycle person
Name: labels, dtype: object

In [75]:
val_data = train_data.loc[train_data["is_valid"] == True]      

In [76]:
print(f"Validation data size: {val_data.shape[0]}")
print(f"Training data size: {train_data.shape[0] - val_data.shape[0]}")
print(f"Total size: {train_data.shape[0]}")

Validation data size: 2510
Training data size: 2501
Total size: 5011


In [77]:
val_data.head(10)

Unnamed: 0,fname,labels,is_valid
0,000005.jpg,chair,True
1,000007.jpg,car,True
2,000009.jpg,horse person,True
4,000016.jpg,bicycle,True
6,000019.jpg,cat,True
7,000020.jpg,car,True
8,000021.jpg,dog person,True
10,000024.jpg,train,True
12,000030.jpg,bicycle person,True
18,000039.jpg,tvmonitor,True


In [8]:
# We will write a dataset class to prepare the training, validation and test datasets. This is very common when 
# using the PyTorch deep learning framework. 

class ImageDataset(Dataset):
    def __init__(self, csv, train, test):
        self.csv = csv
        self.train = train
        self.test = test

        self.all_image_names = self.csv[:]['Id']                                  # extract all the image file names 
        self.all_labels = np.array(self.csv.drop(['Id', 'Genre'], axis = 1))      # extract all the labels (binary vector)

        self.train_ratio = int(0.85 * len(self.csv))                # use 85% of the data for training 
        self.valid_ratio = len(self.csv) - self.train_ratio         # use the last 10 images in 15% remaining data for inference 

        # define and prepare our training data: 
        # set the training data images and labels     
        if self.train == True:
            print(f"Number of training images: {self.train_ratio}")
            self.image_names = list(self.all_image_names[:self.train_ratio])   # extract the first 85% images
            self.labels = list(self.all_labels[:self.train_ratio])             # extract the first 85% labels 

            # define the training transforms
            self.transform = transforms.Compose([
                transforms.ToPILImage(),                 
                transforms.Resize((400, 400)),                 # image resize 
                transforms.RandomHorizontalFlip(p=0.5),        # random horizontal flip 
                transforms.RandomRotation(degrees=45),         # randomly rotating the images 
                transforms.ToTensor(),
            ])

        # set the validation data images and labels
        elif self.train == False and self.test == False:
            print(f"Number of validation images: {self.valid_ratio}")

            # the remaining 15% fo the data (except for the last 10 images) is used for validation
            self.image_names = list(self.all_image_names[-self.valid_ratio:-10])
            self.labels = list(self.all_labels[-self.valid_ratio:])

            # define the validation transforms
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((400, 400)),     # just apply resize image 
                transforms.ToTensor(),
            ])

        # set the test data images and labels, only last 10 images
        # this, we will use in a separate inference script
        elif self.test == True and self.train == False:
            self.image_names = list(self.all_image_names[-10:])
            self.labels = list(self.all_labels[-10:])

            # define the test transforms
            # do not apply any image augmentation 
            # just convert image into PIL format and then to PyTorch tensors
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.ToTensor(),
            ])

    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self, index):
        # read the image according to the image file name 
        image = cv2.imread(f"../input/movie-classifier/Multi_Label_dataset/Images/{self.image_names[index]}.jpg")
        # convert the image from BGR to RGB color format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # apply image transforms
        image = self.transform(image)
        targets = self.labels[index]       # extract the label according to the index position 
        
        return {
            'image': torch.tensor(image, dtype = torch.float32),
            'label': torch.tensor(targets, dtype = torch.float32)
        }

In [9]:
# We will be using a pre-trained ResNet50 deep learning model from the PyTorch models. 
# We keep the intermediate layer weigths frozen and only make the final classification head learnable. 

def model(pretrained, requires_grad):
    model = models.resnet50(progress = True, pretrained = pretrained)

    # to freeze the hidden layers
    if requires_grad == False:
        for param in model.parameters():
            param.requires_grad = False

    # to train the hidden layers
    elif requires_grad == True:
        for param in model.parameters():
            param.requires_grad = True

    # make the classification layer learnable
    # we have 25 classes in total
    model.fc = nn.Linear(2048, 25)    # the final layer has 25 output features according to 25 classes 

    return model

In [10]:
# We need to write the training and validation functions to fit our model on the training dataset and validate on 
# the validation dataset. 

# We will write two very simple functions, which are going to be very similar to any other PyTorch classification functions. 


# training function
def train(model, dataloader, optimizer, criterion, train_data, device):
    """
    six input parameters:
    - neural network model 
    - the training data loader 
    - the optimizer 
    - the loss function (criteria) 
    - the training dataset 
    - the computation device"""

    print('Training')

    model.train()                  # get the model training mode 
    counter = 0                    # keep track of the number of batches per epoch 
    train_running_loss = 0.0       # keep track of the batch wise loss values (need to calculate the loss per epoch)

    # a pretty standard loop for any PyTorch image classification training 
    for i, data in tqdm(enumerate(dataloader), total = int(len(train_data)/dataloader.batch_size)):
        counter += 1
        data, target = data['image'].to(device), data['label'].to(device)
        optimizer.zero_grad()
        outputs = model(data)

        # apply sigmoid activation to get all the outputs between 0 and 1
        outputs = torch.sigmoid(outputs)
        loss = criterion(outputs, target)
        train_running_loss += loss.item()     # add the loss for the batch 

        # backpropagation
        loss.backward()                       # compute the gradients

        # update optimizer parameters
        optimizer.step()                      # perform the adjustment of the weights using the above gradients
        
    train_loss = train_running_loss / counter     # calculate the per epoch loss 

    return train_loss


# validation function 
# note that the validation function does not accept the optimizer as a parameter. 
# this is because we do not need to update the optimizer parameters during validation. 
def validate(model, dataloader, criterion, val_data, device):
    print('Validating')

    model.eval()
    counter = 0
    val_running_loss = 0.0

    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total = int(len(val_data)/dataloader.batch_size)):
            counter += 1
            data, target = data['image'].to(device), data['label'].to(device)
            outputs = model(data)

            # apply sigmoid activation to get all the outputs between 0 and 1
            outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, target)
            val_running_loss += loss.item()
        
        val_loss = val_running_loss / counter

        return val_loss


In [11]:
# this is the final script we need to start our training and validation. 
# basically, this is the integration of all the things that we have written. 

# the following are steps that we are going to follow: 
# 1. initialize the model and training parameters 
# 2. prepare the training and validation data loaders 
# 3. start the loop for training and validation 
# 4. save the loss plot and trained deep learning model to disk

# 1. initialize the computation device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# initialize the model 
model = model(pretrained = True, requires_grad = False).to(device)
# for the ResNet50 model, we will be using the pre-trained weights. 
# but we will not be updating the weights of the intermediate layers. 
# that's why we are passing the argument as "requires_grad = False".

# initialize the learning parameters
lr = 0.0001          # use lower learning rate than usual. we dont want to update the learning rate too rapidly
epochs = 20                 
batch_size = 32
optimizer = optim.Adam(model.parameters(), lr = lr)   # our optimizer is going to be Adam optimizer 
criterion = nn.BCELoss()      # the loss function is Binary Cross-Entropy loss 

# 2. prepare the training and validation data loaders 

# read the training csv file 
train_csv = pd.read_csv('../input/movie-classifier/Multi_Label_dataset/train.csv')

# train dataset 
train_data = ImageDataset(train_csv, train = True, test = False)

# validation dataset 
valid_data = ImageDataset(train_csv, train = False, test = False)

# train data loader
train_loader = DataLoader(train_data, batch_size = batch_size, shuffle = True)

# validation data loader 
valid_loader = DataLoader(valid_data, batch_size = batch_size, shuffle = False)



# 3. start the loop for training and validation 

# start the training and validation
train_loss = []
valid_loss = []
for epoch in range(epochs): 
    print(f"Epoch {epoch+1} of {epochs}")

    train_epoch_loss = train(model, train_loader, optimizer, criterion, train_data, device)
    valid_epoch_loss = validate(model, valid_loader, criterion, valid_data, device)

    # append the training and validation loss values in the train_loss and valid_loss lists, respectively
    train_loss.append(train_epoch_loss)
    valid_loss.append(valid_epoch_loss)

    print(f"Train Loss: {train_epoch_loss:.4f}")
    print(f"Val Loss: {valid_epoch_loss:.4f}")

# save the trained model to disk 
torch.save({
            'epoch': epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': criterion,
            }, '../outputs/model.pth')

# plot and save the train and validation line graphs
plt.figure(figsize = (10, 7))
plt.plot(train_loss, color = 'orange', label = 'train loss')
plt.plot(valid_loss, color = 'red', label = 'validataion loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('../outputs/loss.png')
plt.show()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/haonguyenduy/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:02<00:00, 46.2MB/s]


FileNotFoundError: [Errno 2] No such file or directory: '../input/movie-classifier/Multi_Label_dataset/train.csv'