## University of Bari Aldo Moro
<a name="top"></a>
Master Degree in <b>Computer Science</b> - <b>Computer Vision Course</b><br>

### Vision Transformer for Unmanned Aerial Vehicles Agronomic Research
- [Dependencies](#dependencies)
- [Paths](#paths)
- [Parameters](#parameters)
- [Preprocessing](#preprocessing)
    - [Ground Truth](#ground-truth)
    - [Plots Clipper](#plots_clipper)
- [Dataset](#dataset)
- [Model](#model)
    - [Training](#training)
    - [Evaluation](#evaluation)
- [Visualization](#visualization)
- [Inference](#inference)

### Dependencies
<a name='dependencies'></a>

In [None]:
# Libraries

from argparse import ArgumentParser
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateFinder
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torchvision.io import read_image
from torchvision.models import get_model, ViT_B_32_Weights, ViT_B_16_Weights, ViT_L_16_Weights, ViT_L_32_Weights
import matplotlib.pyplot as plt
import mlflow.pytorch
import numpy as np
import os
import pandas as pd
import pytorch_lightning as pl
import torch
import torchvision
torchvision.disable_beta_transforms_warning()

In [None]:
# Torch version

print("Torch version", torch.__version__)

In [None]:
# Import from notebooks

import import_ipynb

from notebooks.gcp_finder import GCPFinder
from notebooks.clipper import Clipper
from notebooks.dataset import BaseDataset, PlotsDataset
from notebooks.vit import UAV_vit
from notebooks.visualization import Plotter

In [None]:
# Cuda setup

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and being used", device)
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU instead", device)

#### Paths
<a name="paths"></a>

In [None]:
# Paths

DATASET_PATH = "./data/dataset"
PLOT_PATH = "./data/plots"
SAMPLES_PATH = "./data/raw_samples"
VISUALS_PATH = "./data/visuals"
SHAPEFILE_PATH = "./data/shapefile/all_plots.shp"
ORTHOMOSAIC_PATH = "./data/orthomosaic/raster.tif"
GCP_PATH = "./data/GCP_Images"
GROUND_TRUTH_PATH = './data/ground_truth/corn_plant_height_ground_truth.xlsx'
MODEL_PATH = "./data/models/"
CHECKPOINT_PATH = "./mlruns/0/"

### Parameters
<a name='parameters'></a>

In [None]:
# Parameters

TEST_SIZE = 0.2 # % of dataset
VAL_SIZE = 0.2  # % of training set

MIN_EPOCS = 20
MAX_EPOCS = -1
BATCH_SIZE = 8
WORKERS = 4
IMG_SIZE = (224,224)

BACKBONES = [
    {'name': "vit_b_16", 'weights': ViT_B_16_Weights.DEFAULT}, # default ImageNet on DeIT recipe
    {'name': "vit_b_16", 'weights': ViT_B_16_Weights.IMAGENET1K_SWAG_LINEAR_V1},
    {'name': "vit_b_32", 'weights': ViT_B_32_Weights.DEFAULT},
    {'name': "vit_l_16", 'weights': ViT_L_16_Weights.DEFAULT}, # recipe https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16
    {'name': "vit_l_16", 'weights': ViT_L_16_Weights.IMAGENET1K_SWAG_LINEAR_V1},
    {'name': "vit_l_32", 'weights': ViT_L_32_Weights.DEFAULT},
]
BACKBONE = BACKBONES[2]

FROZEN_LAYERS = 6

ES_PATIENCE = 10
ES_STOPPING_THRESHOLD = 1e-4
ES_DIVERGENCE_THRESHOLD = 5
SIGMA_MULTIPLIER = 3

LR_FINDER = False
LEARNING_RATE = 1e-6
WEIGHT_DECAY = 1e-1
DROPOUT = 0
ATTENTION_DROPOUT = 0

## Preprocessing
<a name='preprocessing'></a>

#### Ground truth
<a name="ground_truth"></a>

In [None]:
# Loading the ground truth
df = pd.read_excel(GROUND_TRUTH_PATH)
df[['DataFile 3', 'PHT(m)', 'Elev maximum (m)']].head()

In [None]:
# Ground Truth Preprocessing

def renaming(filename):
    return filename[:-4] + ".png"

def elev_avg(row):
    return (row['PHT(m)'] + row["Elev maximum (m)"]) / 2

# Apply renaming and averaging
df["filename"] = df["DataFile 3"].apply(renaming)
df["elevation_avg"] = (df.apply(elev_avg, axis=1))

# Elevation format conversion to float32
df["elevation_avg"] = df["elevation_avg"].astype(np.float32)
df[['filename', 'elevation_avg']].head()

In [None]:
# Targets normalization

scaler = MinMaxScaler()
df["elevation"] = scaler.fit_transform(df[["elevation_avg"]])

In [None]:
# Scaled targets

df[["filename", "elevation"]].head()

#### Plots Clipper
<a name='plots_clipper'></a>

In [None]:
# clipper = Clipper(ORTHOMOSAIC_PATH, SHAPEFILE_PATH, PLOT_PATH)
# clipper.start()

## Dataset
<a name="dataset"></a>

In [None]:
# Datasets definition
labels=df[["filename", "elevation", "elevation_avg"]]
dataset = PlotsDataset(labels=labels, img_dir=PLOT_PATH, img_size=IMG_SIZE)

print(f"Dataset length: {len(dataset)}")
print(f"Image type: {type(dataset[0][0])}")
print(f"Image shape: {dataset[0][0].shape})")
print(f"Label type: {type(dataset[0][1])}")

In [None]:
# Dataset samples

cols, rows = 4, 4
figure = plt.figure(figsize=(8, 8))
for i in range(1, cols * rows + 1):
    sample_index = torch.randint(len(dataset), size=(1,)).item()
    row = labels.iloc[sample_index] # picks only those in the ground truth
    image_path = os.path.join(os.getcwd(), PLOT_PATH, row['filename'])
    img = read_image(image_path)
    label = labels.iloc[i]['elevation_avg']
    img = img.permute(1,2,0)
    figure.add_subplot(rows, cols, i)
    plt.suptitle("Plot samples " + "Dataset")
    filename = labels.iloc[i]['filename'][0:-4]
    plt.title(f'{filename}: {label:.2f}m')        
    plt.axis("off")
    plt.imshow(img)
plt.show()

In [None]:
# Pixel distribution

plt.hist(dataset[0][0].permute(1,2,0).ravel(), bins=50, density=True, log=True)
plt.xlabel("Pixel values")
plt.ylabel("Relative frequency")
plt.title("Pixels distribution")

In [None]:
means, stds = dataset.get_means_stds()

In [None]:
# HuberLoss delta / Pseudo Huber Loss beta
sigma = np.mean(stds)
print("Sigma", sigma)
loss_threshold = sigma * SIGMA_MULTIPLIER
print("Loss threshold", loss_threshold)

In [None]:
# Dataset split

train_set, test_set = train_test_split(dataset, test_size=TEST_SIZE)
train_set, val_set = train_test_split(train_set, test_size=VAL_SIZE)

print(f"Training set size: {len(train_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Test set size: {len(test_set)}")

In [None]:
# Dataloaders

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, num_workers=WORKERS)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, num_workers=WORKERS)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, num_workers=WORKERS)

print(f"Train Dataloader size: {len(train_loader)}")
print(f"Validation Dataloader size: {len(val_loader)}")
print(f"Test Dataloader size: {len(test_loader)}")

## Model
<a name='model'></a>

In [None]:
# Backbone

backbone = get_model(BACKBONE['name'], weights=BACKBONE['weights'])
backbone.heads

In [None]:
# Vargs setup

def setup_vargs(lr = LEARNING_RATE):
    parser = ArgumentParser()
    parser.add_argument('--learning_rate', type=float, default=lr)
    parser.add_argument('--loss_threshold', type=float, default=loss_threshold)
    parser.add_argument('--weight_decay', type=float, default=WEIGHT_DECAY)
    parser.add_argument('--batch_size', type=int, default=BATCH_SIZE)
    parser.add_argument('--no_grad_layers_n', type=int, default=FROZEN_LAYERS)
    parser.add_argument('--dropout', type=float, default=DROPOUT)
    parser.add_argument('--attention_dropout', type=float, default=ATTENTION_DROPOUT)

    args, _ = parser.parse_known_args()

    vargs = vars(args)
    return vargs

vargs = setup_vargs()

In [None]:
# Model

model = UAV_vit(backbone, **vargs)

In [None]:
# Model summary

model.backbone

### Training
<a name='training'></a>

In [None]:
# MLFlow init

mlflow.pytorch.autolog() 
mlflow.start_run()

run = mlflow.active_run()
log_run_id = run.info.run_id
print(f"Active run_id: {run.info.run_id}")

In [None]:
# Callbacks

run_path = f"{CHECKPOINT_PATH}{log_run_id}/artifacts/restored_model_checkpoint/"
CHECKPOINT_FILENAME ="uav_vit-checkpoint"

learning_rate_finder_cb = LearningRateFinder()
checkpoint_cb = ModelCheckpoint(dirpath=run_path, save_top_k=1, monitor="val_loss", mode="min", filename=CHECKPOINT_FILENAME, save_weights_only=True)
earlyStopping_cb = EarlyStopping(monitor="val_loss", patience=ES_PATIENCE, stopping_threshold=ES_STOPPING_THRESHOLD, divergence_threshold=ES_DIVERGENCE_THRESHOLD, mode="min")

callbacks = [earlyStopping_cb, checkpoint_cb]
if LR_FINDER:
    callbacks.append(learning_rate_finder_cb)

In [None]:
# Trainer

trainer = pl.Trainer(
    min_epochs=MIN_EPOCS,
    max_epochs=MAX_EPOCS,
    callbacks=callbacks,
    num_sanity_val_steps=0,
    enable_checkpointing=True,
    default_root_dir=run_path,
    # enable_progress_bar=False
)

In [None]:
# Fit

trainer.fit(model, train_loader, val_loader)

In [None]:
# Plotter
plotter = Plotter(VISUALS_PATH, log_run_id)

In [None]:
# LR finder plot
LR_FOUND = True if LR_FINDER and learning_rate_finder_cb.optimal_lr.suggestion() else False
if LR_FOUND:
    suggested_lr = learning_rate_finder_cb.optimal_lr.suggestion()
    learning_rate_finder_cb.optimal_lr.plot(suggest=True, show=True)
    plt.savefig(f"{plotter.path}/suggested_lr.png")
    plt.show()

### Testing
<a name='testing'></a>

In [None]:
# Best model path

# ckpt_run_path = checkpoint_cb.best_model_path

ckpt_run_path = f"{run_path}{CHECKPOINT_FILENAME}.ckpt"


In [None]:
# Load best model
if LR_FOUND:
    vargs = setup_vargs(suggested_lr)
model = UAV_vit.load_from_checkpoint(ckpt_run_path, backbone=backbone, **vargs)

In [None]:
# Test best model

trainer.test(model, test_loader)

In [None]:
# Extract test results

outputs = model.test_output
test_loss = model.test_loss
test_targets_mean = model.test_targets_mean
test_targets = [x[1] for x in test_set]

In [None]:
outputs = scaler.inverse_transform(np.array(outputs).reshape(-1,1))
test_targets_mean = scaler.inverse_transform(np.array(test_targets_mean).reshape(-1,1))
test_targets = scaler.inverse_transform(np.array(test_targets).reshape(-1,1))

In [None]:
residuals = []
for lab, out in zip(test_targets, outputs):
    residuals.append(lab - out)

In [None]:
# R2 score

r2 = r2_score(test_targets, outputs)
print(f"R2 score: {r2}")

### Visualization
<a name='visualization'></a>

In [None]:
plotter.boxplot(test_targets)

In [None]:
plotter.outputs_vs_targets(outputs, test_targets, r2)

In [None]:
plotter.residuals_vs_outputs(residuals, outputs)

In [None]:
plotter.residuals_hist(residuals)

In [None]:
plotter.residuals_errors(residuals)

In [None]:
plotter.targets_means_vs_losses(test_targets_mean, test_loss)

In [None]:
# Log parameters

mlflow.log_params({
    "model": BACKBONE['name'],
    "weights": BACKBONE['weights'],
    "R2Score": r2,
    "weight_decay": WEIGHT_DECAY,
    "batch_size": BATCH_SIZE,
    "dropout": DROPOUT,
    "dropout_attention": ATTENTION_DROPOUT,
    "criterion": model.criterion,
    "frozen layers": FROZEN_LAYERS,
    "train_epochs": MAX_EPOCS,
    "run_id": log_run_id,
    "path": checkpoint_cb.best_model_path,
    })
if LR_FOUND:
    mlflow.log_param("suggested_lr", suggested_lr)

mlflow.log_artifact("main.ipynb")
mlflow.log_artifact("notebooks/vit.ipynb")
mlflow.log_artifact("notebooks/dataset.ipynb")
mlflow.end_run()

### Inference
<a name='inference'></a>

In [None]:
samplesDataset = BaseDataset(SAMPLES_PATH, IMG_SIZE)
samplesDataloader = DataLoader(samplesDataset, batch_size=BATCH_SIZE)

In [None]:
# Predictions
predictions = trainer.predict(model, dataloaders=samplesDataloader, return_predictions=True)

In [None]:
predictions = np.concatenate(predictions)
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1,1))
predictions = [item.item() for sublist in predictions for item in sublist]
print("Predicted results:\n", *predictions, sep='\n')