# Import libs

In [None]:
import os
import json
import random
import pickle
import numpy as np
import pandas as pd
from itertools import product
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from datetime import datetime

import utils
from VisionModels import CustomCNN, CustomEfficientNetB3
from VisionDatasets import ContactDataset

%load_ext autoreload
%autoreload 2

# Pre-settings

In [None]:
parameters_json = os.path.join(os.getcwd(), 'settings/parameters.json')
paths_json = os.path.join(os.getcwd(), 'settings/paths.json')

In [None]:
with open(parameters_json, 'r') as json_file:
    params = json.load(json_file)

with open(paths_json, 'r') as json_file:
    paths = json.load(json_file)

In [None]:
random_seed = params["random_seed"]

os.environ['PYTHONHASHSEED'] = str(random_seed)
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
train_set = [1, 3, 5, 7, 8, 10, 12, 14, 15, 17, 19, 21, 41, 98, 107, 143]
val_set = [2, 4, 9, 11, 16, 18, 42, 116]
test_set = [37, 68, 71, 23, 22, 24, 25, 26, 27, 28, 29,
            46, 47, 38, 95, 104, 101, 92, 110, 45, 125, 134]

mapping_set = {
    1: 'C_M1_T1_4', 3: 'C_M1_T1_7', 5: 'C_M1_T1_3', 7: 'C_M1_T1_6', 8: 'R2_M1_T1_6', 10: 'R2_M1_T1_7', 12: 'R2_M1_T1_3',
    14: 'R2_M1_T1_5', 15: 'L2_M1_T1_3', 17: 'L2_M1_T1_4', 19: 'L2_M1_T1_5', 21: 'L2_M1_T1_7', 41: 'Z2_M1_T1_1',
    98: 'M3_Z2_NF', 107: 'M5_Z2_NF', 143: 'M12_Z2_NF', 2: 'C_M1_T1_1', 4: 'C_M1_T1_5', 9: 'R2_M1_T1_2', 11: 'R2_M1_T1_4',
    16: 'L2_M1_T1_6', 18: 'L2_M1_T1_1', 42: 'Z2_M1_T1_2', 116: 'M7_Z2_NF', 37: 'C_M1_T1_8', 68: 'M1_NF', 71: 'M2_NF',
    23: 'R1_M1_T1_1', 22: 'R1_M1_T1_2', 24: 'R3_M1_T1_1', 25: 'R3_M1_T1_2', 26: 'L1_M1_T1_1', 27: 'L1_M1_T1_2',
    28: 'L3_M1_T1_1', 29: 'L3_M1_T1_2', 46: 'Z1_M1_T1_1', 47: 'Z3_M1_T1_1', 38: 'R2_M1_T1_8', 95: 'M3_R2_NF',
    104: 'M5_R2_NF', 101: 'M5_L2_NF', 92: 'M3_L2_NF', 110: 'M7_L2_NF', 45: 'Z2_M1_T1_5', 125: 'M8_Z2_NF', 134: 'M9_Z2_NF'
}

In [None]:
data_set = 'silicone'
model_set = ['CustomCNN', 'EfficientNet']
label_set = ['GT', 'MTurk']
model_combinations = list(product(model_set, label_set))

# Import Data

In [None]:
data = pd.read_csv(paths[data_set]['labels'], header=0)

In [None]:
# filter the data rows by the index of sets
train_data = data[data['HPC_Path'].str.extract(
    r'imageset_(\d+)').astype(int).isin(train_set).any(axis=1)]
val_data = data[data['HPC_Path'].str.extract(
    r'imageset_(\d+)').astype(int).isin(val_set).any(axis=1)]

In [None]:
# add data to Datasets
train_dataloader = {}
val_dataloader = {}
loss_fn = {}

for label_name in label_set:

    image_col = 'HPC_Path'
    
    if label_name == 'GT':
        label_col = 'GT'
    elif label_name == 'MTurk':
        label_col = 'MTurk_Label'

    train_dataset = ContactDataset(
        images=train_data[image_col].tolist(),
        labels=train_data[label_col].to_numpy(),
        coords=list(zip(
            train_data['x'].astype(int),
            train_data['y'].astype(int))),
        jitter=True)

    val_dataset = ContactDataset(
        images=val_data[image_col].tolist(),
        labels=val_data[label_col].to_numpy(),
        coords=list(zip(
            train_data['x'].astype(int),
            train_data['y'].astype(int))),
        jitter=False)

    # create DataLoader with existed Datasets
    train_dataloader[label_name] = DataLoader(
        dataset=train_dataset,
        batch_size=params['batch_size'],
        num_workers=(16 if os.cpu_count() > 16 else os.cpu_count()),
        pin_memory=True,
        shuffle=True)

    val_dataloader[label_name] = DataLoader(
        dataset=val_dataset,
        batch_size=params['batch_size'],
        num_workers=(16 if os.cpu_count() > 16 else os.cpu_count()),
        pin_memory=True,
        shuffle=True)
    
    weights = train_dataset.getWeights().to(device)
    loss_fn[label_name] = nn.CrossEntropyLoss(weight=weights)


# Training

In [None]:
for model_name, label_name in model_combinations:
    # select the model
    if model_name == 'CustomCNN':
        model = CustomCNN()
    elif model_name == 'EfficientNet':
        model = CustomEfficientNetB3()

    # set up the optimizer (hyper-parameters)
    optimizer = optim.Adam(
        model.parameters(),
        lr=params[model_name]['learning_rate'],
        weight_decay=params[model_name]['weight_decay'])

    # train and retrieve the metrics
    utils.train(
        model=model,
        optimizer=optimizer,
        loss_fn=loss_fn[label_name],
        dataloader=train_dataloader[label_name],
        val_dataloader=val_dataloader[label_name],
        device=device,
        use_tqdm=True,
        epochs=params['epochs'])

    utils.save_metrics(model, model_name, label_name)
    utils.save_state_dict(model, model_name, label_name)

# Predict

In [None]:
results = {}

In [None]:
for set_name in tqdm(test_set):
    # concat paths
    label_path = os.path.join(
        paths[data_set]['image_set'], f'labels_{set_name}.txt')
    images_path = os.path.join(
        paths[data_set]['image_set'], f'imageset_{set_name}')
    coordinates_path = os.path.join(
        paths[data_set]['keypoints'],
        f"{mapping_set[set_name]}_L_h264{paths[data_set]['keypoints_model']}.h5")

    # load data files
    test_data = pd.read_csv(label_path, header=None).iloc[:, 1:4].to_numpy()
    coordinates = pd.read_hdf(coordinates_path).loc[:, [
        (paths[data_set]['keypoints_model'], 'Mid_1', 'x'),
        (paths[data_set]['keypoints_model'], 'Mid_1', 'y')]].to_numpy()

    test_images = []
    test_laebls = []

    # add data to list
    force_threshold = 0.2
    for index, row in enumerate(test_data):
        test_images.append(os.path.join(images_path, f'img_{index}.jpg'))
        test_laebls.append(1 if np.sqrt(row.dot(row)) > force_threshold else 0)

    # create dataset and dataloader
    test_dataset = ContactDataset(
        images=test_images,
        labels=test_laebls,
        coords=coordinates.astype(int).tolist(),
        jitter=False)

    test_dataloader = DataLoader(
        dataset=test_dataset,
        batch_size=512,
        num_workers=(16 if os.cpu_count() > 16 else os.cpu_count()),
        pin_memory=True,
        shuffle=True)

    # predict for each model
    for model_name, label_name in model_combinations:
        # select the model
        if model_name == 'CustomCNN':
            model = CustomCNN()
        elif model_name == 'EfficientNet':
            model = CustomEfficientNetB3()

        utils.load_state_dict(model, model_name, label_name)
        predictions, ground_truth = utils.predict(
            model=model,
            dataloader=test_dataloader,
            device=device)

        results[(model_name, label_name, set_name)] = {
            "Prediction": predictions,
            "Ground Truth": ground_truth
        }

In [None]:
with open(f'labels/{data_set}_{datetime.now().strftime("%Y%m%d_%H%M%S")}.pkl', 'wb') as file:
    pickle.dump(results, file)

# Test

In [None]:
model_name = 'EfficientNet'
label_name = 'MTurk'
model = CustomEfficientNetB3()

In [None]:
utils.load_state_dict(model, model_name, label_name)

In [None]:
pred, gt = utils.predict(
    model=model,
    dataloader=test_dataloader,
    device=device)

In [None]:
from sklearn.metrics import accuracy_score
for ts in test_set:
    pred, gt = results[(model_name, label_name, ts)].values()
    binary_predictions = (pred > 0.5).astype(int)
    print(accuracy_score(gt, binary_predictions))

In [None]:
# set up the optimizer (hyper-parameters)
optimizer = optim.Adam(
    model.parameters(),
    lr=params[model_name]['learning_rate'],
    weight_decay=params[model_name]['weight_decay'])

# train and retrieve the metrics
utils.train(
    model=model,
    optimizer=optimizer,
    loss_fn=loss_fn[label_name],
    dataloader=train_dataloader[label_name],
    val_dataloader=val_dataloader[label_name],
    device=device,
    use_tqdm=True,
    epochs=10)

utils.save_metrics(model, model_name, label_name)
utils.save_state_dict(model, model_name, label_name)

In [None]:
from PIL import Image, ImageDraw
import torchvision
width, height = 936, 702
merged_image = Image.new('RGB', (width, height))

draw = ImageDraw.Draw(merged_image)

small_image_width, small_image_height = 234, 234
x, y = 0, 0

for images, labels in test_dataloader:
    for image in images:
        image = torchvision.transforms.ToPILImage()(image)
        merged_image.paste(image, (x, y))
    
        x += small_image_width
        
        if x + small_image_width > width:
            x = 0
            y += small_image_height

merged_image.show()