In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets,models,transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()  

use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA")

Using CUDA


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip -uq "/content/drive/My Drive/Colab Notebooks/DATA_CHAMBER_2021.zip" -d "./"

In [4]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

**Hàm chọn loại xử lý hình ảnh đầu vào**

ở đây chúng ta sẽ có 4 loại là raw, scale, preprocess, augmentation

In [25]:
tensor = transforms.ToTensor()
resize = transforms.Resize(256)
crop_224 = transforms.CenterCrop(224)
crop_64 = transforms.CenterCrop(64)
rotation = transforms.RandomRotation(degrees=20)
blur = transforms.GaussianBlur(kernel_size=3)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])


def transform(type):
  if(type == "raw"):
    train_transform =  transforms.Compose([crop_224, tensor])
    test_transform = transforms.Compose([crop_224, tensor])
  if(type == "scale"):
    train_transform =  transforms.Compose([resize, crop_64, tensor])
    test_transform = transforms.Compose([crop_64, tensor])
  if(type == "preprocess"):
    train_transform =  transforms.Compose([resize, crop_224, blur, tensor])
    test_transform = transforms.Compose([crop_224, tensor])
  if(type == "augmentation"):
    train_transform =  transforms.Compose([resize, rotation, crop_224, tensor])
    test_transform = transforms.Compose([crop_224, tensor])
  return train_transform, test_transform

In [6]:
train_dataset_path = '../content/DATA_CHAMBER_2021/train'
test_dataset_path = '../content/DATA_CHAMBER_2021/test'

In [7]:
def prepare_dataset(train_transform, test_transform):
  train_datasets = ImageFolderWithPaths(root = train_dataset_path, transform = train_transform)
  test_datasets = ImageFolderWithPaths(root = test_dataset_path, transform = test_transform)
  return train_datasets, test_datasets

def prepare_dataloader(train_datasets, test_datasets):
  train_loader = DataLoader(train_datasets, batch_size = 8, shuffle = True, num_workers = 2)
  test_loader = DataLoader(test_datasets, batch_size = 8, shuffle = False, num_workers = 2)
  return train_loader, test_loader

In [8]:
def set_device():
    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"
    return torch.device(dev)
set_device()

device(type='cuda', index=0)

**HÀM TẠO MÔ HÌNH**
- Ta chọn hai mô hình là vgg16 , vgg19
- Tương ứng với tham số truyền vào thì ta sẽ chọn ra mô hình tương ứng

In [9]:
vgg16 = models.vgg19_bn(pretrained=True)
vgg19 = models.vgg16_bn(pretrained=True)

def create_model(name_model):
  if(name_model == "vgg16"):
    vgg = vgg16
  if(name_model == "vgg19"):
    vgg = vgg19
  for param in vgg.features.parameters():
    param.require_grad = False
  # Newly created modules have require_grad=True by default
  num_features = vgg.classifier[6].in_features
  features = list(vgg.classifier.children())[:-1] # Remove last layer
  features.extend([nn.Linear(num_features, 3)]) # Add our layer with 4 outputs
  vgg.classifier = nn.Sequential(*features) # Replace the model classifier
  return vgg

Downloading: "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth" to /root/.cache/torch/hub/checkpoints/vgg19_bn-c79401a0.pth


  0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

**HÀM TRAIN VÀ TEST MODEL**

In [10]:
def train_model(model, train_loader, train_datasets, criterion, optimizer, num_epochs):
    train_batches = len(train_loader)
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        
        model.train()
        running_loss = 0.0
        running_corrects = 0
        for i,data in enumerate(train_loader):
          inputs, labels,_ = data
          print("\rTraining batch {}/{}".format(i+1, train_batches), end='', flush=True)
          inputs = inputs.to(device)
          labels = labels.to(device)
          
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          _, preds = torch.max(outputs, 1)
          running_loss += loss.item() * inputs.size(0)
          running_corrects += torch.sum(preds == labels.data)

          epoch_loss = running_loss / len(train_datasets)
          epoch_acc = running_corrects.double() / len(train_datasets)

        print('{} loss: {:.4f}, acc: {:.4f}'.format("train",
                                                        epoch_loss,
                                                        epoch_acc))
    return model

In [11]:
def test_model(model, test_loader, criterion):
    labels_input=list()
    labels_output=list()
    vid_id = list()
        
    model.eval()

    running_loss = 0.0
    running_corrects = 0

    for images, labels, fname in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        labels_input= labels_input + labels.tolist()
        for f in fname:
          vid_id.append(f.split('/')[-1].split('.')[0].split('_')[0])
        outputs = model(images)
            
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
            
        labels_output= labels_output + preds.tolist()
    return labels_input,labels_output, vid_id

In [12]:
criterion = nn.CrossEntropyLoss()
device = set_device()
optimizer_ft = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# **HÀM MAIN**
- truyền vào mô hình và loại biến đổi dữ liệu mà bạn muốn chọn để huấn luyện và test

In [31]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
import pandas as pd
modelz = ["vgg16"] #Lựa chọn mô hình muốn sử dụng
data = ["raw"] #lựa chọn cách sử lý ảnh đầu vào
def main():
  for namemodel in modelz:
    models = create_model(name_model = namemodel)
    device = set_device()
    model = models.to(device)
    for typ in data:
      print(namemodel + " - " + typ + ":\n")
      train_transform, test_transform = transform(type = typ)
      train_datasets, test_datasets = prepare_dataset(train_transform, test_transform)
      train_loader, test_loader = prepare_dataloader(train_datasets, test_datasets)
      model = train_model(model, train_loader, train_datasets, criterion, optimizer_ft, num_epochs=1)
      y_true,y_pred,vid_id = test_model(model,test_loader, criterion)
      print(classification_report(y_true,y_pred))
      accuracy_score(y_true, y_pred)
      return y_true, y_pred, model


In [32]:
y_true,y_pred, model = main()

vgg16 - raw:

Epoch 1/1
----------
Training batch 840/840train loss: 0.0161, acc: 0.9949
              precision    recall  f1-score   support

           0       0.65      0.85      0.74       409
           1       0.81      0.90      0.85       367
           2       1.00      0.80      0.89       831

    accuracy                           0.84      1607
   macro avg       0.82      0.85      0.83      1607
weighted avg       0.87      0.84      0.84      1607



In [28]:
accuracy_score(y_true, y_pred)

0.8780336029869322

# TEST MÔ HÌNH VỚI VIDEO

In [33]:
def video_report(model=None, testsets=None, device="cuda"):
  if testsets == None:
    testsets = torchvision.datasets.ImageFolder(root=test_dataset_path, transform=transform_test_64)

  video = namedtuple('video', ['id', "label_true", 'label_pred'])
  path = [] # [158, 158, 158, 165, 165, 135, 135 ...]
  for index, image in enumerate(testsets.imgs):
    path.append(image[0].split("/")[-1].split("_")[0])

  vid_list = [] # [158, 165, 135 ...]
  for frame in path:
    if (frame in vid_list) == False:
      vid_list.append(frame)
    else:
      pass

  ytrue = []
  ypred = []
  model.to(device)
  with torch.no_grad():
    model.eval()
    for image, label in testsets:
      image = image.unsqueeze(0).to(device)
      output = model(image)
      _, predicted = torch.max(output, dim=1)
      ytrue.append(label)
      ypred += list(predicted.cpu().numpy())

  outputs_vid = []
  ytrue_video = []
  ypred_video = []
  for vid_id in vid_list:
    vid_true = []
    vid_pred = []
    for index, img in enumerate(path):
      if img == vid_id:
        vid_true.append(ytrue[index])
        vid_pred.append(ypred[index])

    rate_video_true = [0, 0, 0] # vector count for voting
    rate_video_pred = [0, 0, 0]
    for label in range(3):
      rate_video_true[label] = list(vid_true).count(label)
      rate_video_pred[label] = list(vid_pred).count(label)
    
    label_video_true = np.argmax(rate_video_true)
    label_video_pred = np.argmax(rate_video_pred)

    print("id:", vid_id, ", true:", label_video_true, ", pred:",label_video_pred) # label pred
    ytrue_video.append(label_video_true)
    ypred_video.append(label_video_pred)
    outputs_vid.append(video(id=vid_id, label_true=label_video_true, label_pred=label_video_pred))
  print(classification_report(ytrue_video, ypred_video, target_names=get_classes()))
  return outputs_vid

In [36]:
def get_classes():
  classes = ['2C', '3C', '4C']
  return classes

In [37]:
from collections import namedtuple
transform_raw = transforms.Compose([crop_224, tensor])
testsets = torchvision.datasets.ImageFolder(root=test_dataset_path, transform=transform_raw)
video_report(model=model, testsets=testsets,device ="cuda")

id: 158 , true: 0 , pred: 0
id: 165 , true: 0 , pred: 0
id: 168 , true: 0 , pred: 0
id: 169 , true: 0 , pred: 0
id: 171 , true: 0 , pred: 0
id: 176 , true: 0 , pred: 0
id: 177 , true: 0 , pred: 0
id: 178 , true: 0 , pred: 0
id: 181 , true: 0 , pred: 1
id: 183 , true: 0 , pred: 0
id: 191 , true: 0 , pred: 0
id: 192 , true: 0 , pred: 0
id: 157 , true: 1 , pred: 1
id: 159 , true: 1 , pred: 1
id: 161 , true: 1 , pred: 1
id: 162 , true: 1 , pred: 1
id: 166 , true: 1 , pred: 1
id: 174 , true: 1 , pred: 0
id: 175 , true: 1 , pred: 1
id: 179 , true: 1 , pred: 1
id: 185 , true: 1 , pred: 1
id: 186 , true: 1 , pred: 1
id: 189 , true: 1 , pred: 1
id: 190 , true: 1 , pred: 1
id: 194 , true: 1 , pred: 1
id: 160 , true: 2 , pred: 2
id: 163 , true: 2 , pred: 0
id: 164 , true: 2 , pred: 2
id: 167 , true: 2 , pred: 2
id: 170 , true: 2 , pred: 2
id: 172 , true: 2 , pred: 2
id: 173 , true: 2 , pred: 2
id: 180 , true: 2 , pred: 2
id: 182 , true: 2 , pred: 2
id: 184 , true: 2 , pred: 2
id: 187 , true: 2 , 

[video(id='158', label_true=0, label_pred=0),
 video(id='165', label_true=0, label_pred=0),
 video(id='168', label_true=0, label_pred=0),
 video(id='169', label_true=0, label_pred=0),
 video(id='171', label_true=0, label_pred=0),
 video(id='176', label_true=0, label_pred=0),
 video(id='177', label_true=0, label_pred=0),
 video(id='178', label_true=0, label_pred=0),
 video(id='181', label_true=0, label_pred=1),
 video(id='183', label_true=0, label_pred=0),
 video(id='191', label_true=0, label_pred=0),
 video(id='192', label_true=0, label_pred=0),
 video(id='157', label_true=1, label_pred=1),
 video(id='159', label_true=1, label_pred=1),
 video(id='161', label_true=1, label_pred=1),
 video(id='162', label_true=1, label_pred=1),
 video(id='166', label_true=1, label_pred=1),
 video(id='174', label_true=1, label_pred=0),
 video(id='175', label_true=1, label_pred=1),
 video(id='179', label_true=1, label_pred=1),
 video(id='185', label_true=1, label_pred=1),
 video(id='186', label_true=1, lab