In [7]:
'''
/**
* 2/2021
* Pontifícia Universidade Católica de Minas Gerais
* Advisor - Prof. Alexei Machado
* Designed by:
* @author Igor Machado Seixas - 561897
* @version 0.10a
*/
'''

'''
/**
* Libraries:
* Numpy - PIL - sklearn - torch - sklearn
*/
'''

import numpy as np
import os
import random
import itertools
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

from torchvision import transforms
from torchvision.utils import make_grid
import torchvision.models as models

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

from PIL import Image
from PIL import ImageOps

import matplotlib.pyplot as plt

from pathlib import Path

In [8]:
# Data preparation.
transform_train = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.25,], std=[0.5,])])
transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.25,], std=[0.5,])])

In [9]:
# Configuration.
#root_directory = f'..//Datasets//Colombiam//thyroid-cut'
path = os.getcwd()
root_directory = os.path.dirname(path)+'\Datasets\Colombiam\\thyroid-crop-small-background-360-train'
test_root_directory = os.path.dirname(path)+'\Datasets\Colombiam\\thyroid-crop-small-background-360-test'
print(root_directory)
train_ratio = 1
validation_ratio = .25
rotation_angle = 90

random_seed = 15
torch.manual_seed(random_seed) # Preavesibility
batch_size = 32
pin_memory = True
#num_workers = 4

c:\Users\igormseixas\TCC_II\Datasets\Colombiam\thyroid-crop-small-background-360-train


In [10]:
# Device configuration.
if torch.cuda.is_available(): 
    device = torch.device('cuda')
    print('Running on cuda')
    # Distribute across "2" gpus
    #model = nn.DataParallel(model)
    #print('Running on multiple gpus')
else:
    device = torch.device('cpu')
    print('Running on cpu')

Running on cuda


In [11]:
# Function to create a dataset with a set of training and test set and a set of classes.
# Param img_folder - contains the image folder path.
# Param train_size - contains the size of the training set.
# Param...
def load_dataset(img_folder, test_folder, train_ratio, validation_ratio, transform_train, transform_test, rotation_angle=0, batch_size=64, data_augmentation=False):
    image_data_train=[]
    image_data_val=[]
    image_data_test=[]
    image_class=[]

    file_name_memory = []
    file_name_memory_train = []
    file_name_memory_val = []

    # Iteract in the files directory.
    for dir in os.listdir(img_folder):
        if dir.startswith('8'): continue
        print("Train and Validation:", dir)
        file_name_memory = [s for s in os.listdir(os.path.join(img_folder, dir))] # Get all the names.
        file_name_memory = list(zip(*(iter(file_name_memory),) * 1)) # Create group of 1 files.
        # Randonize the names by group.
        random.Random(random_seed).shuffle(file_name_memory)
        # Get train_ratio to put into file_name_memory_train and file_name_memory_test.
        file_name_memory_train = file_name_memory[:int(len(file_name_memory)*train_ratio)] # First n*train_ratio elements.
        file_name_memory_val = file_name_memory_train[:int(len(file_name_memory_train)*validation_ratio)] # First n*validation_ratio elements.
        
        # Flat train list and validation.
        file_name_memory_train = list(itertools.chain(*file_name_memory_train))
        file_name_memory_val = list(itertools.chain(*file_name_memory_val))

        for file in os.listdir(os.path.join(img_folder, dir)):
            image_path = os.path.join(img_folder, dir,  file)

            # The first train_ratio times append to the image_data_train.
            if file in file_name_memory_train:
                image = transform_train(Image.open(image_path)) # Train transformation for train and val.
                image = np.array(image)
                if file in file_name_memory_val:
                    # Append de image data to the validation set.
                    image_data_val.append([image,int(dir[0])]) 
                else:
                    # Append de image data to the training set.
                    image_data_train.append([image,int(dir[0])])

            #else:
            #    image = transform_test(Image.open(image_path)) # Test transformation for train and val.
            #    image = np.array(image)
            #    # Append de image data to an image_data_x array and a image_data_y with the classifiers.
            #    image_data_test.append([image,int(dir[0])])

        image_class.append(dir)

    # Iteract in the files directory.
    for dir in os.listdir(test_folder):
        if dir.startswith('8'): continue
        print("Test:", dir)
        for file in os.listdir(os.path.join(test_folder, dir)):
            test_path = os.path.join(test_folder, dir,  file)
            
            image = transform_test(Image.open(test_path)) # Test transformation for train and val.
            image = np.array(image)
            # Append de image data to an image_data_x array and a image_data_y with the classifiers.
            image_data_test.append([image,int(dir[0])])


    # Defining data training, validation and test sizes.
    train_size = int(train_ratio * len(image_data_train))
    val_size = len(image_data_train) - train_size

    # Transform into DataLoader 
    train_dl = DataLoader(image_data_train, batch_size=batch_size, shuffle=True, pin_memory=pin_memory)
    val_dl = DataLoader(image_data_val, batch_size=batch_size, shuffle=False, pin_memory=pin_memory)
    test_dl = DataLoader(image_data_test, batch_size=batch_size, shuffle=False, pin_memory=pin_memory)
    classes = np.array(image_class)
    #print('Train Mean and STD:', get_mean_and_std(train_dl))
    #print('Validation Mean and STD:', get_mean_and_std(val_dl))
    #print('Test Mean and STD:', get_mean_and_std(test_dl))

    return train_dl, val_dl, test_dl, classes

In [12]:
# Model to be loaded.
model = models.mobilenet_v3_large(pretrained=True)
model = model.to(device=device)

# Loss and optimizer.
learning_rate = 1e-3 #It was 0.07
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr= learning_rate) # Adam
checkpoint = torch.load('./my_model/my_model_5_classification/my_model_98_MobileNet_pre_large.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

In [13]:
# Check epoch and loss.
print (checkpoint['epoch'])
print (checkpoint['loss'])

67
0.3034930234944335


In [14]:
# Check accuraccy of the training set.
# Function to give accuracy of a given data set. It will returns the predict and output to
# check using other metrics.
def check_acc(dataloader, model, device):
    # Get the data.
    num_correct = 0
    num_samples = 0
    predict = []
    target = []

    # Set model to eval
    model.eval()
    
    with torch.no_grad():
        for batch_idx, (data,targets) in enumerate(dataloader):
            #print('Batch:', batch_idx)
            data = data.to(device=device)
            targets = targets.to(device=device)

            ## Forward Pass
            scores = model(data)
            _, predictions = scores.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)    

            predict = predict + predictions.to(device="cpu").numpy().tolist()
            target = target + targets.to(device="cpu").numpy().tolist()
        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
        )

    result = float(num_correct) / float(num_samples) * 100
    #model.train()
    return target, predict, result

In [15]:
# Get train and test data.
train_dl, val_dl, test_dl, classes = load_dataset(root_directory, test_root_directory, train_ratio, validation_ratio, transform_train, transform_test, batch_size=batch_size)
# Print size of training, validation and test sets.
print("Training size: ", len(train_dl.dataset))
print("Validation size: ", len(val_dl.dataset))
print("Test size: ", len(test_dl.dataset))
print("Total size: ", len(train_dl.dataset)+len(val_dl.dataset)+len(test_dl.dataset))

Train and Validation: 1
Train and Validation: 2
Train and Validation: 3
Train and Validation: 4-4a
Train and Validation: 5-4b
Train and Validation: 6-4c
Train and Validation: 7-5
Test: 1
Test: 2
Test: 3
Test: 4-4a
Test: 5-4b
Test: 6-4c
Test: 7-5
Training size:  3912
Validation size:  1304
Test size:  134
Total size:  5350


In [16]:
train_output, train_predict, _ = check_acc(train_dl, model, device)
print('\nResults on the training set:')
print(classification_report(train_output, train_predict))

Got 420 / 3912 with accuracy 10.74

Results on the training set:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           2       0.11      0.97      0.19       432
           3       0.00      0.00      0.00       180
           4       0.00      0.00      0.00      1176
           5       0.00      0.00      0.00       936
           6       0.00      0.00      0.00       732
           7       0.00      0.00      0.00       456

    accuracy                           0.11      3912
   macro avg       0.02      0.14      0.03      3912
weighted avg       0.01      0.11      0.02      3912



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
val_output, val_predict, _ = check_acc(val_dl, model, device)
print('\nResults on the validating set:')
print(classification_report(val_output, val_predict))

Got 142 / 1304 with accuracy 10.89

Results on the validating set:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           2       0.11      0.99      0.20       144
           3       0.00      0.00      0.00        60
           4       0.00      0.00      0.00       392
           5       0.00      0.00      0.00       312
           6       0.00      0.00      0.00       244
           7       0.00      0.00      0.00       152

    accuracy                           0.11      1304
   macro avg       0.02      0.14      0.03      1304
weighted avg       0.01      0.11      0.02      1304



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
#tp, fn, fp, tn = confusion_matrix(test_output, test_predict).ravel()
test_output, test_predict, _ = check_acc(test_dl, model, device)
print('\nResults on the validating set:')
print(classification_report(test_output, test_predict))

Got 15 / 134 with accuracy 11.19

Results on the validating set:
              precision    recall  f1-score   support

           2       0.11      1.00      0.20        15
           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        38
           5       0.00      0.00      0.00        32
           6       0.00      0.00      0.00        28
           7       0.00      0.00      0.00        16

    accuracy                           0.11       134
   macro avg       0.02      0.17      0.03       134
weighted avg       0.01      0.11      0.02       134



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
print(confusion_matrix(test_output,test_predict))

[[15  0  0  0  0  0]
 [ 5  0  0  0  0  0]
 [38  0  0  0  0  0]
 [32  0  0  0  0  0]
 [28  0  0  0  0  0]
 [16  0  0  0  0  0]]
