## University of Bari Aldo Moro
Master Degree in <b>Computer Science</b> - <b>Computer Vision Course</b><br>
Francesco Peragine - f.peragine@studenti.uniba.it

### Vision Transformer for Unmanned Aerial Vehicles Agronomic Research
- [Dependencies](#dependencies)
- [Libraries](#libraries)
- [Dataset](#dataset)
    - [Parameters](#parameters)
    - [GCP Finder](#gcp_finder)
    - [Plots Clipper](#plots_clipper)
    - [Ground Truth](#ground-truth)
    - [Creation](#creation)
    - [Loading](#loading)
    - [Splitting](#splitting)
- [Dataloader](#dataloader)
- [Model](#model)
    - [Training](#training)
    - [Evaluation](#evaluation)
    - [Save](#save)
    - [Prediction](#prediction)
- [Visualization](#visualization)

### Dependencies
<a name='dependencies'></a>

### Libraries
<a name='libraries'></a>

In [None]:
# %pip install git+https://github.com/PyTorchLightning/pytorch-lightning

#### Libraries
<a name="libraries"></a>

In [None]:
import torch.nn.functional as TF
import torch
import pandas as pd
import mlflow
import matplotlib.pyplot as plt
from torchvision.transforms import transforms
from torchvision.models import get_model, ViT_L_32_Weights
from torch.utils.data import DataLoader, ConcatDataset
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import TensorDataset

In [None]:
import import_ipynb
from notebooks.utils import calculate_mean_std, get_dataset_samples
from notebooks.dataset import PlotsDataset
from notebooks.gcp_finder import GCPFinder
from notebooks.clipper import Clipper
from notebooks.uav_vit import UAV_vit

In [None]:
print(pl.__version__)

#### Paths
<a name="paths"></a>

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and being used", device)
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU instead", device)

In [None]:
DATASET_PATH = "./data/raw/Case_Study_1/Raw_Images"
GCP_PATH = "./data/GCP_Images"
GROUND_TRUTH_PATH = './data/ground_truth/ground_truth.csv'
MODEL_PATH = "./data/models/"
ORTHOMOSAIC_PATH = "./data/orthophoto/raster.tif"
PLOT_PATH = "./data/plots"
SHAPEFILE_PATH = "./data/raw/Case_Study_1/Shapefile/Plots_Shapefile/all_plots.shp"

#### Ground truth
<a name="ground_truth"></a>

In [None]:
# Loading
ground_truth = pd.read_csv(GROUND_TRUTH_PATH)

# Elevation format conversion to float32
num_format = "float32"
ground_truth["elev"] = ground_truth["elev"].astype(num_format).values
print(type(ground_truth["elev"].values[0]))

## Preprocessing
<a name='preprocessing'></a>

### GCP Finder
<a name='gcp_finder'></a>

In [None]:
# gcp_finder = GCPFinder(DATASET_PATH, GCP_PATH)
# gcp_finder.gcp_mover()

### Plots Clipper
<a name='plots_clipper'></a>

In [None]:
# clipper = Clipper(ORTHOMOSAIC_PATH, SHAPEFILE_PATH, PLOT_PATH)
# clipper.start()

In [None]:
mlflow.start_run()
mlflow.pytorch.autolog()

## Dataset
<a name="dataset"></a>

#### Augmentation

In [None]:
hue = 0.1
saturation = 0.1
brightness = 0.1
contrast = 0.2

transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    
    transforms.ColorJitter(hue=hue, saturation=saturation, brightness=brightness, contrast=contrast), # Random color jittering for slight changes in hue, saturation, brightness, contrast

    transforms.RandomApply([transforms.RandomErasing(p=0.2, scale=(0.01, 0.1), ratio=(0.3, 3.3))], p=0.3), # Randomly add shadow patterns
    # transforms.RandomApply([transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0, hue=0)], p=0.2), # Randomly darken image corners to simulate vignetting
    # transforms.RandomApply([transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.5, hue=0.3)], p=0.1), # Randomly add lens flare effects
])

In [None]:
IMG_SIZE = (224,224)

base_dataset = {
    "name": "base",
    "dataset": PlotsDataset(ground_truth, PLOT_PATH, IMG_SIZE),
}
augmented_dataset = {
    "name": "augmented",
    "dataset": PlotsDataset(ground_truth, PLOT_PATH, IMG_SIZE, transforms),
}

concat_dataset = {
    "name": "concat",
    "dataset": ConcatDataset([base_dataset["dataset"], augmented_dataset["dataset"]])
}

curr_dataset = concat_dataset

# normalize_transforms = transforms.Compose([transforms.Normalize(mean=means, std=stds)])

# normalized_images = [torch.clamp(normalize_transforms(sample[0]), 0, 1) for sample in combined_dataset]
# labels = [sample[1] for sample in combined_dataset]

# dataset = TensorDataset(torch.stack(normalized_images), torch.Tensor(labels))

print(f"Dataset length: {len(curr_dataset['dataset'])}")
print(f"Image type: {type(curr_dataset['dataset'][0][0])}")
print(f"Image shape: {curr_dataset['dataset'][0][0].shape})")
print(f"Label type: {type(curr_dataset['dataset'][0][1])}")

In [None]:
means, stds = calculate_mean_std(curr_dataset['dataset'])
print(f"Means: {means}, Stds: {stds}")

In [None]:
plt.imshow(curr_dataset['dataset'][0][0].permute(1,2,0))
plt.title("Normalized image - crop height: " + str(curr_dataset['dataset'][0][1])+ "m")
plt.show()

In [None]:
# Pixel distribution
plt.hist(curr_dataset['dataset'][0][0].permute(1,2,0).ravel(), bins=50, density=True)
plt.xlabel("Pixel values")
plt.ylabel("Relative frequency")
plt.title("Distribution of pixels")

#### Split
<a name='split'></a>

In [None]:
VAL_SIZE = 0.2
TEST_SIZE = 0.2

dataset = {}
dataset["tmp"], dataset["test"] = train_test_split(curr_dataset["dataset"], test_size=TEST_SIZE)
dataset["train"], dataset["val"] = train_test_split(dataset["tmp"], test_size=VAL_SIZE)

print(f"Training set size: {len(dataset['train'])}")
print(f"Validation set size: {len(dataset['val'])}")
print(f"Test set size: {len(dataset['test'])}")

In [None]:
def get_labels(dataset, phase):
    # dataset is a dictionary with keys "train", "val", "test"
    # each record in the dataset is a tuple of (image, label)
    labels = [x[1] for x in dataset[phase]]
    return labels

phases = ["train", "val", "test"]
labels = {}
labels = {phase: get_labels(dataset, phase) for phase in phases}

print(f"Training set labels size: {len(labels['train'])}")
print(f"Validation set labels size: {len(labels['val'])}")
print(f"Test set labels size: {len(labels['test'])}")


### Hyperparameters
<a name='hyperparameters'></a>

In [None]:
TRAIN_EPOCS = 50
BATCH_SIZE = 16

WORKERS = 8

### DataLoaders
<a name="dataloader"></a>

In [None]:
dataloader = {}
dataloader["train"] = DataLoader(dataset["train"], batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)
dataloader["val"] = DataLoader(dataset["val"], batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)
dataloader["test"] = DataLoader(dataset["test"], batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)

print(f"Train Dataloader size: {len(dataloader['train'])}")
print(f"Validation Dataloader size: {len(dataloader['val'])}")
print(f"Test Dataloader size: {len(dataloader['test'])}")

In [None]:
get_dataset_samples(base_dataset["dataset"], "Base dataset")

In [None]:
get_dataset_samples(augmented_dataset["dataset"], "Augmented dataset")

#### Model
<a name='model'></a>

In [None]:
# Load the ViT model
vit_l_32 = get_model("vit_l_32", weights=ViT_L_32_Weights.IMAGENET1K_V1)
loss_fn = TF.mse_loss

model = UAV_vit(vit_l_32, loss_fn, labels)

## Training
<a name='training'></a>

In [None]:
earlyStopping = EarlyStopping(monitor="val_loss", patience=3, mode="min")

# Initialize a trainer
trainer = pl.Trainer(
    # default_root_dir=MODEL_PATH,
    max_epochs=TRAIN_EPOCS,
    # callbacks=[earlyStopping],
    logger=True,
    accelerator="auto",
    num_sanity_val_steps=0
)

In [None]:
# Training
trainer.fit(model, dataloader["train"], dataloader["val"])

In [None]:
# model.create_scatterplots()

In [None]:
# Save the trained model
torch.save(model.state_dict(), MODEL_PATH + "uav_vit.pt")

#### Evaluation
<a name='prediction'></a>

In [None]:
# Testing
trainer.test(model, dataloader["test"], verbose=True)

In [None]:
# Log the parameters
mlflow.log_params({"dataset": curr_dataset["name"], "dataset_length": len(curr_dataset["dataset"])})
mlflow.log_params({"batch_size": BATCH_SIZE, "train_epochs": TRAIN_EPOCS})
mlflow.log_params({"hue": hue, "saturation": saturation, "brightness": brightness, "contrast": contrast})
mlflow.end_run()

## Prediction

In [None]:
# model = UAV_vit.load_from_checkpoint("best_model.ckpt")
# model.freeze()

# x = test_dataset[0][0]
# predicition = model(x)

In [None]:
img = curr_dataset["dataset"][0][0]
predicted = model(img.unsqueeze(0)).item()

print(f"Actual result {curr_dataset['dataset'][0][1]} predicted result {predicted}")