## University of Bari Aldo Moro
<a name="top"></a>
Master Degree in <b>Computer Science</b> - <b>Computer Vision Course</b><br>

### Vision Transformer for Unmanned Aerial Vehicles Agronomic Research
- [Dependencies](#dependencies)
- [Paths](#paths)
- [Parameters](#parameters)
- [Preprocessing](#preprocessing)
    - [Ground Truth](#ground-truth)
    - [GCP Finder](#gcp_finder)
    - [Plots Clipper](#plots_clipper)
- [Dataset](#dataset)
- [Model](#model)
    - [Training](#training)
    - [Evaluation](#evaluation)
    - [Inference](#inference)

### Dependencies
<a name='dependencies'></a>

In [None]:
# Libraries

from argparse import ArgumentParser
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateFinder
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torchvision.models import get_model, ViT_B_32_Weights, ViT_B_16_Weights, ViT_L_16_Weights, ViT_H_14_Weights
import matplotlib.pyplot as plt
import mlflow.pytorch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torchvision
torchvision.disable_beta_transforms_warning()
print(torch.__version__)

In [None]:
# Import from notebooks

import import_ipynb

from notebooks.gcp_finder import GCPFinder
from notebooks.clipper import Clipper
from notebooks.dataset import BaseDataset, PlotsDataset
from notebooks.vit import UAV_vit
from notebooks.visualization import Visualization

In [None]:
# Cuda setup

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and being used", device)
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU instead", device)

#### Paths
<a name="paths"></a>

In [None]:
# Paths

DATASET_PATH = "./data/dataset"
PLOT_PATH = "./data/plots"
SAMPLES_PATH = "./data/raw_samples"
VISUALS_PATH = "./data/visuals"
SHAPEFILE_PATH = "./data/shapefile/all_plots.shp"
ORTHOMOSAIC_PATH = "./data/orthomosaic/raster.tif"
GCP_PATH = "./data/GCP_Images"
GROUND_TRUTH_PATH = './data/ground_truth/corn_plant_height_ground_truth.xlsx'
MODEL_PATH = "./data/models/"
CHECKPOINT_PATH = "./data/checkpoints/"

### Parameters
<a name='parameters'></a>

In [None]:
# Parameters

TEST_SIZE = 0.2 # % of dataset
VAL_SIZE = 0.2  # % of training set

MAX_EPOCS = -1
BATCH_SIZE = 8
WORKERS = 8
IMG_SIZE = (224,224)

BACKBONES = [
    {'name': "vit_b_16", 'weights': ViT_B_16_Weights.DEFAULT}, # default ImageNet on DeIT recipe
    {'name': "vit_b_16", 'weights': ViT_B_16_Weights.IMAGENET1K_SWAG_LINEAR_V1},
    {'name': "vit_b_32", 'weights': ViT_B_32_Weights.DEFAULT},
    {'name': "vit_l_16", 'weights': ViT_L_16_Weights.DEFAULT}, # recipe https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16
    {'name': "vit_l_16", 'weights': ViT_L_16_Weights.IMAGENET1K_SWAG_LINEAR_V1},
    {'name': "vit_l_32", 'weights': ViT_L_16_Weights.DEFAULT},
    {'name': "vit_h_14", 'weights': ViT_H_14_Weights.IMAGENET1K_SWAG_LINEAR_V1},
]
BACKBONE = BACKBONES[4]

FROZEN_LAYERS = 9

ES_PATIENCE = 10
ES_STOPPING_THRESHOLD = 1e-5
ES_DIVERGENCE_THRESHOLD = 5

LR_FINDER = True
LEARNING_RATE = 1e-5
WEIGHT_DECAY = 1e-3
DROPOUT = 0.3
ATTENTION_DROPOUT = 0

## Preprocessing
<a name='preprocessing'></a>

#### Ground truth
<a name="ground_truth"></a>

In [None]:
# Loading the ground truth
df = pd.read_excel(GROUND_TRUTH_PATH)
df[['DataFile 3', 'PHT(m)', 'Elev maximum (m)']].head()

In [None]:
# Ground Truth Preprocessing

def renaming(filename):
    return filename[:-4] + ".png"

def elev_avg(row):
    return (row['PHT(m)'] + row["Elev maximum (m)"]) / 2

# Apply renaming and averaging
df["filename"] = df["DataFile 3"].apply(renaming)
df["elevation_avg"] = (df.apply(elev_avg, axis=1))

# Elevation format conversion to float32
df["elevation_avg"] = df["elevation_avg"].astype(np.float32)
df[['filename', 'elevation_avg']].head()

In [None]:
# Targets normalization

scaler = MinMaxScaler()
df["elevation"] = scaler.fit_transform(df[["elevation_avg"]])

In [None]:
# Scaled targets

df[["filename", "elevation"]].head()

#### Ground Control Point (GCP) Finder
<a name='gcp_finder'></a>

In [None]:
# gcp_finder = GCPFinder(DATASET_PATH, GCP_PATH)
# gcp_finder.gcp_mover()

#### Plots Clipper
<a name='plots_clipper'></a>

In [None]:
# clipper = Clipper(ORTHOMOSAIC_PATH, SHAPEFILE_PATH, PLOT_PATH)
# clipper.start()

## Dataset
<a name="dataset"></a>

In [None]:
# Datasets definition

dataset = PlotsDataset(labels=df[["filename", "elevation"]], img_dir=PLOT_PATH, img_size=IMG_SIZE)

print(f"Dataset length: {len(dataset)}")
print(f"Image type: {type(dataset[0][0])}")
print(f"Image shape: {dataset[0][0].shape})")
print(f"Label type: {type(dataset[0][1])}")

In [None]:
# Dataset samples

dataset.show_samples(df['elevation'], "Dataset")

In [None]:
# Pixel distribution

plt.hist(dataset[0][0].permute(1,2,0).ravel(), bins=50, density=True)
plt.xlabel("Pixel values")
plt.ylabel("Relative frequency")
plt.title("Distribution of pixels")

In [None]:
# Datasets means and stds

means, stds = dataset.get_means_stds()
print(f'Dataset means: {means}\nstds: {stds}')

In [None]:
# Dataset split

train_set, test_set = train_test_split(dataset, test_size=TEST_SIZE)
train_set, val_set = train_test_split(train_set, test_size=VAL_SIZE)

print(f"Training set size: {len(train_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Test set size: {len(test_set)}")

In [None]:
# Dataloaders

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, num_workers=WORKERS)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, num_workers=WORKERS)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, num_workers=WORKERS)

print(f"Train Dataloader size: {len(train_loader)}")
print(f"Validation Dataloader size: {len(val_loader)}")
print(f"Test Dataloader size: {len(test_loader)}")

## Model
<a name='model'></a>

In [None]:
# Backbone

backbone = get_model(BACKBONE['name'], weights=BACKBONE['weights'])
backbone.heads

In [None]:
# Vargs setup

def setup_vargs(lr = LEARNING_RATE):
    parser = ArgumentParser()
    parser.add_argument('--learning_rate', type=float, default=lr)
    parser.add_argument('--weight_decay', type=float, default=WEIGHT_DECAY)
    parser.add_argument('--batch_size', type=int, default=BATCH_SIZE)
    parser.add_argument('--no_grad_layers_n', type=int, default=FROZEN_LAYERS)
    parser.add_argument('--dropout', type=float, default=DROPOUT)
    parser.add_argument('--attention_dropout', type=float, default=ATTENTION_DROPOUT)

    args, _ = parser.parse_known_args()

    vargs = vars(args)
    return vargs

vargs = setup_vargs()

In [None]:
# Model

model = UAV_vit(backbone, **vargs)

In [None]:
model.backbone

### Training
<a name='training'></a>

In [None]:
# MLFlow init

mlflow.pytorch.autolog() 
mlflow.start_run()

run = mlflow.active_run()
log_run_id = run.info.run_id
print(f"Active run_id: {run.info.run_id}")

In [None]:
# Callbacks

learning_rate_finder_cb = LearningRateFinder()
checkpoint_cb = ModelCheckpoint(dirpath=CHECKPOINT_PATH, save_top_k=1, monitor="val_loss", mode="min", filename="uav_vit-{epoch:02d}-{val_loss:.3f}")
earlyStopping_cb = EarlyStopping(monitor="val_loss", patience=ES_PATIENCE, stopping_threshold=ES_STOPPING_THRESHOLD, divergence_threshold=ES_DIVERGENCE_THRESHOLD, mode="min")

callbacks = [earlyStopping_cb, checkpoint_cb]
if LR_FINDER:
    callbacks.append(learning_rate_finder_cb)

In [None]:
# Trainer

trainer = pl.Trainer(
    max_epochs=MAX_EPOCS,
    callbacks=callbacks,
    num_sanity_val_steps=0,
    enable_checkpointing=True,
)

In [None]:
# Fit

trainer.fit(model, train_loader, val_loader)

In [None]:
LR_FOUND = True if 'suggested_lr' in globals() else False
if LR_FOUND:
    suggested_lr = learning_rate_finder_cb.optimal_lr.suggestion()
    learning_rate_finder_cb.optimal_lr.plot(suggest=True, show=True)

### Testing
<a name='testing'></a>

In [None]:
# Best model path

checkpoint_cb.best_model_path

In [None]:
# Load best model

vargs = setup_vargs(suggested_lr) if LR_FOUND else setup_vargs()
model = UAV_vit.load_from_checkpoint(checkpoint_cb.best_model_path, backbone=backbone, **vargs)

In [None]:
# Test best model

trainer.test(model, test_loader)

In [None]:
# Extract test results

outputs = model.test_output
test_loss = model.test_loss
test_targets_mean = model.test_targets_mean
test_targets = [x[1] for x in test_set]

residuals = []
for lab, out in zip(test_targets, outputs):
    residuals.append(lab - out)

In [None]:
# R2 score

r2 = r2_score(test_targets, outputs)
print(f"R2 score: {r2}")

### Inference
<a name='inference'></a>

In [None]:
visuals = Visualization(VISUALS_PATH, log_run_id)

In [None]:
visuals.plot_residuals(residuals)

In [None]:
visuals.plot_residuals_vs_outputs(residuals, outputs)

In [None]:
visuals.plot_residuals_errors(residuals)

In [None]:
visuals.plot_targets_means_vs_losses(test_targets_mean, test_loss)

In [None]:
visuals.plot_outputs_vs_targets(outputs, test_targets)

In [None]:
# Log parameters

mlflow.log_params({
    "model": BACKBONE['name'],
    "weights": BACKBONE['weights'],
    "R2Score": r2,
    "weight_decay": WEIGHT_DECAY,
    "batch_size": BATCH_SIZE,
    "train_epochs": MAX_EPOCS,
    "run_id": log_run_id,
    "path": checkpoint_cb.best_model_path,
    "dropout": DROPOUT,
    "dropout_attention": ATTENTION_DROPOUT,
    "frozen layers": FROZEN_LAYERS
    })
if LR_FOUND:
    mlflow.log_param("suggested_lr", suggested_lr)

mlflow.log_artifact("main.ipynb")
mlflow.log_artifact("notebooks/vit.ipynb")
mlflow.log_artifact("notebooks/dataset.ipynb")
mlflow.end_run()

In [None]:
samplesDataset = BaseDataset(SAMPLES_PATH, IMG_SIZE)
samplesDataloader = DataLoader(samplesDataset, batch_size=BATCH_SIZE)

In [None]:
# Predictions
predictions = trainer.predict(model, dataloaders=samplesDataloader, return_predictions=True)

In [None]:
predictions = np.concatenate(predictions)
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1,1))
predictions = [item.item() for sublist in predictions for item in sublist]
print(f"Predicted results {predictions}")