In [None]:
import os
import shutil
import random

In [None]:
random.seed(123)

In [None]:
ROOT_DIR = '/kaggle/input/'
DATASET_DIR = '/kaggle/working/dataset/'

In [None]:
datasets = ['labeleddata2']

In [None]:
dataset_dirs = []
for dataset in datasets:
    current_dirs = os.listdir(os.path.join(ROOT_DIR, dataset))
    current_dirs = [os.path.join(ROOT_DIR, dataset, d) for d in current_dirs]
    dataset_dirs += current_dirs

In [None]:
shutil.rmtree(os.path.join(DATASET_DIR, '0_no_slicks')) 
shutil.rmtree(os.path.join(DATASET_DIR, '1_slicks'))

In [None]:
no_slicks = filter(lambda d: not d.endswith('slicks'), dataset_dirs)
for no_slick in no_slicks:
    dir_name = os.path.split(no_slick)[-1]
    
    output_path_no_slicks = os.path.join(DATASET_DIR, '0_no_slicks')
    output_path_slicks = os.path.join(DATASET_DIR, '1_slicks')
    
    os.makedirs(output_path_no_slicks, exist_ok=True)
    os.makedirs(output_path_slicks, exist_ok=True)
    
    slick_files = os.listdir(no_slick + '_slicks')
    for s in slick_files:
        shutil.copy(os.path.join(no_slick + '_slicks', s), output_path_slicks)
    
    no_slick_files = os.listdir(no_slick)
    random.shuffle(no_slick_files)
    for ns in no_slick_files[0:len(slick_files)]:
        shutil.copy(os.path.join(no_slick, ns), output_path_no_slicks)

In [None]:
print(len(os.listdir('/kaggle/working/dataset/1_slicks')))

In [None]:
print(len(os.listdir('/kaggle/working/dataset/0_no_slicks')))

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt 

In [None]:
PROJECT_ROOT = '/kaggle/working/'

DATASET_PATH = os.path.join(PROJECT_ROOT, 'dataset')
CHECKPOINTS_PATH = os.path.join(PROJECT_ROOT, 'checkpoints')
PLOTS_PATH = os.path.join(CHECKPOINTS_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [None]:
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        """
        Args:
          tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
          Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
          # The normalize code -> t.sub_(m).div_(s)
        return tensor

MEANS = [0.485, 0.456, 0.406]
STDS = [0.229, 0.224, 0.225]
unorm = UnNormalize(MEANS, STDS)

In [None]:
def load_dataset_ECOL_labeled(
    root_dir, 
    img_size, 
    batch_size, 
    num_workers=0, 
    shuffle=True,
    transform=None,
    ):

    MEANS = [0.485, 0.456, 0.406]
    STDS = [0.229, 0.224, 0.225]

    if transform is None:
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize(img_size),
            # transforms.Grayscale(num_output_channels=3),
            transforms.Normalize(mean=MEANS, std=STDS),
            ])

    dataset = ImageFolder(root_dir, transform=transform) 

    # Create DataLoader object for the dataset  
    loader = DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers
        ) 

    return loader

In [None]:
def show_batch_as_grid(input_batch, save_path=None):
    
    grid_img = torchvision.utils.make_grid(
        unorm(input_batch).cpu()
        )

    plt.figure(figsize=(6 * input_batch.shape[0], 10))
    plt.imshow(grid_img.permute(1, 2, 0))
    if save_path is not None:
        plt.savefig(save_path)
    plt.close()

    
def visualize_false_negatives(inputs, labels, preds, save_path=None):
    preds_binary = preds > 0.5
    is_false_negative = ((preds_binary == 0) * (labels == 1)).to(torch.bool).squeeze()
    
    if is_false_negative.sum() == 0:
        return 

    false_negatives = inputs[is_false_negative] 
    
    show_batch_as_grid(false_negatives, save_path)


def visualize_true_positives(inputs, labels, preds, save_path=None):
    preds_binary = preds > 0.5
    is_true_positive = ((preds_binary == 1) * (labels == 1)).to(torch.bool).squeeze()

    if is_true_positive.sum() == 0:
        return

    true_positives = inputs[is_true_positive]

    show_batch_as_grid(true_positives, save_path)

In [None]:
EPOCHS = 10
LEARNING_RATE = 0.0005
MAX_ITERS = 1000

LOADER_PARAMS = {
    'root_dir': DATASET_PATH, 'img_size': (256, 256), 'batch_size': 32
}

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(DEVICE)

# Define the loss function
criterion = nn.BCEWithLogitsLoss()

# Loading the Data Loader
dataloader = load_dataset_ECOL_labeled(**LOADER_PARAMS)

# Load the pretrained model
model = models.vgg16(pretrained=True)

# Replace the classification layer with the new one
# Will output the prediction for only 1 class
model.classifier[6] = nn.Linear(4096, 1)

# Move model to GPU
model = model.to(DEVICE)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

losses = []
accuracies = []

In [None]:
losses = []
accuracies = []

total_batch_num = len(dataloader.dataset) // dataloader.batch_size
# Training loop
for epoch in range(30):
    model.train()
    os.makedirs(os.path.join(PLOTS_PATH, f'epoch_{epoch}'), exist_ok=True)

    running_loss = 0
    running_hits = 0
    num_positives = 0
    total_positives = 0
    max_pred = 0
    for i, data in enumerate(dataloader):
        inputs, labels = data[0].to(DEVICE), data[1].to(DEVICE)
        labels = labels.unsqueeze(dim=1).to(torch.float32) 

        optimizer.zero_grad()

        preds = model(inputs)

        loss = criterion(preds, labels)

        loss.backward()
        optimizer.step()

        running_loss += float(loss)
        
        preds_sigmoid = torch.sigmoid(preds)

        num_hits = (labels == (preds_sigmoid > 0.5)).sum()
        running_hits += num_hits

        num_positives += (preds_sigmoid > 0.5).sum()
        total_positives += labels.sum()

        if preds.max() > max_pred:
            max_pred = float(preds.max())

        if i % 10 == 0:
            print(f'Epoch {epoch}, Batch {i}/{total_batch_num}: {float(loss)}')

        if i % 30 == 0:
            save_path = os.path.join(PLOTS_PATH, f'epoch_{epoch}', f'false_negative_batch_{i}.png')
            visualize_false_negatives(inputs, labels, preds_sigmoid, save_path)

            save_path = os.path.join(PLOTS_PATH, f'epoch_{epoch}', f'true_positive_batch_{i}.png')
            visualize_true_positives(inputs, labels, preds_sigmoid, save_path)

    losses.append(running_loss / total_batch_num) 
    accuracies.append(running_hits / len(dataloader.dataset))

    print(f'\n=== Epoch {epoch} ===')
    print(f'Avg loss = {losses[-1]}')
    print(f'ACC = {accuracies[-1]}')
    print(f'positives  = {100 * num_positives / len(dataloader.dataset)}% ; {num_positives} / {total_positives}')
    print(f'max pred = {max_pred}\n')

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sn
import pandas as pd
import numpy as np

In [None]:
TP, TN, FP, FN = 0, 0, 0, 0
TPp, TNp, FPp, FNp = [], [], [], []

# OVO MORA KAD TESTIRAS, PRICAO SAM TI JCUE ZA ONAJ DROPOUT
# model.eval()
model.train()

for i, data in enumerate(dataloader):
    
    if i % 10 == 0:
        print(f'{i} / {len(dataloader.dataset) // dataloader.batch_size}')
        
    inputs, labels = data[0].to(DEVICE), data[1].to(DEVICE)
    labels = labels.unsqueeze(dim=1).to(torch.float32) 
    preds_eval = model(inputs)
    preds_eval_sigmoid = torch.sigmoid(preds)
    preds_binary = preds_eval_sigmoid > 0.5
    for i in range(len(labels)):        
        if preds_binary[i] == 0 and labels[i] == 0:
            TN += 1
            TNp.append(float(preds_eval_sigmoid[i]))
        elif preds_binary[i] == 1 and labels[i] == 1:
            TP += 1
            TPp.append(float(preds_eval_sigmoid[i]))
        elif preds_binary[i] == 0 and labels[i] == 1:
            FN += 1
            FNp.append(float(preds_eval_sigmoid[i]))
        else:
            FP += 1
            FPp.append(float(preds_eval_sigmoid[i]))

In [None]:
(TP + TN)/(TP + TN + FP + FN)

In [None]:
plt.hist(TPp)
plt.xlim([0, 1])

In [None]:
plt.hist(TNp)
plt.xlim([0, 1])

In [None]:
plt.hist(FPp)
plt.xlim([0, 1])

In [None]:
plt.hist(FNp)
plt.xlim([0, 1])