## University of Bari Aldo Moro
Master Degree in <b>Computer Science</b> - <b>Computer Vision Course</b><br>
Francesco Peragine - f.peragine@studenti.uniba.it

### Vision Transformer for Unmanned Aerial Vehicles Agronomic Research
- [Dependencies](#dependencies)
- [Libraries](#libraries)
- [Dataset](#dataset)
    - [Parameters](#parameters)
    - [GCP Finder](#gcp_finder)
    - [Plots Clipper](#plots_clipper)
    - [Ground Truth](#ground-truth)
    - [Creation](#creation)
    - [Loading](#loading)
    - [Splitting](#splitting)
- [Dataloader](#dataloader)
- [Model](#model)
    - [Training](#training)
    - [Evaluation](#evaluation)
    - [Save](#save)
    - [Prediction](#prediction)
- [Visualization](#visualization)

### Dependencies
<a name='dependencies'></a>

### Libraries
<a name='libraries'></a>

In [None]:
# %pip install git+https://github.com/PyTorchLightning/pytorch-lightning
import pytorch_lightning as pl
print(pl.__version__)

In [None]:
import torch.nn.functional as TF
import torch
import pandas as pd
import mlflow.pytorch
import mlflow
import matplotlib.pyplot as plt
from torchvision.transforms import v2
from torchvision.models import get_model, ViT_L_32_Weights
from torch.utils.data import DataLoader, ConcatDataset
from sklearn.model_selection import train_test_split

#### Paths
<a name="paths"></a>

In [None]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and being used", device)
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU instead", device)

In [None]:
DATASET_PATH = "./data/raw/Case_Study_1/Raw_Images"
GCP_PATH = "./data/GCP_Images"
ORTHOMOSAIC_PATH = "./data/orthophoto/raster.tif"
SHAPEFILE_PATH = "./data/raw/Case_Study_1/Shapefile/Plots_Shapefile/all_plots.shp"
PLOT_PATH = "./data/plots"
GROUND_TRUTH_PATH = './data/ground_truth/ground_truth.csv'
CHECKPOINT_PATH = "./data/checkpoints"

In [None]:
ground_truth = pd.read_csv(GROUND_TRUTH_PATH)
print(ground_truth)

## Preprocessing
<a name='preprocessing'></a>

### GCP Finder
<a name='gcp_finder'></a>

In [None]:
from lib.gcp_finder import GCPFinder

# gcp_finder = GCPFinder(DATASET_PATH, GCP_PATH)
# gcp_finder.gcp_mover()

### Plots Clipper
<a name='plots_clipper'></a>

In [None]:
from lib.clipper import Clipper
   
# clipper = Clipper(ORTHOMOSAIC_PATH, SHAPEFILE_PATH, PLOT_PATH)
# clipper.start()

In [None]:
mlflow.start_run()

## Dataset
<a name="dataset"></a>

#### Augmentation

In [None]:
hue = 0.1
saturation = 0.1
brightness = 0.1
contrast = 0.2

transforms = v2.Compose([
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    
    v2.ColorJitter(hue=hue, saturation=saturation, brightness=brightness, contrast=contrast), # Random color jittering for slight changes in hue, saturation, brightness, contrast

    v2.RandomApply([v2.RandomErasing(p=0.2, scale=(0.01, 0.1), ratio=(0.3, 3.3))], p=0.3), # Randomly add shadow patterns
    # v2.RandomApply([v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0, hue=0)], p=0.2), # Randomly darken image corners to simulate vignetting
    # v2.RandomApply([v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.5, hue=0.3)], p=0.1), # Randomly add lens flare effects
])

In [None]:
from lib.dataset import PlotsDataset
from lib.utils import calculate_mean_std
from torch.utils.data import TensorDataset

IMG_SIZE = (224,224)

base_dataset = {
    "name": "base",
    "dataset": PlotsDataset(ground_truth, PLOT_PATH, IMG_SIZE),
}
augmented_dataset = {
    "name": "augmented",
    "dataset": PlotsDataset(ground_truth, PLOT_PATH, IMG_SIZE, transforms),
}

curr_dataset = augmented_dataset

# dataset = ConcatDataset([base_dataset, augmented_dataset])
dataset = curr_dataset["dataset"]

# means, stds = calculate_mean_std(combined_dataset)
# print(f"Means: {means}, Stds: {stds}")

# normalize_transforms = v2.Compose([v2.Normalize(mean=means, std=stds)])

# normalized_images = [torch.clamp(normalize_transforms(sample[0]), 0, 1) for sample in combined_dataset]
# labels = [sample[1] for sample in combined_dataset]

# dataset = TensorDataset(torch.stack(normalized_images), torch.Tensor(labels))


print(f"Dataset length: {len(dataset)}")
print(f"Image type: {type(dataset[0][0])}")
print(f"Image shape: {dataset[0][0].shape})")
print(f"Label type: {type(dataset[0][1])}")

In [None]:
from lib.utils import calculate_mean_std

means, stds = calculate_mean_std(dataset)
print(f"Means: {means}, Stds: {stds}")

In [None]:
plt.imshow(dataset[0][0].permute(1,2,0))
plt.title("Normalized image - crop height: " + str(dataset[0][1])+ "m")
plt.show()

In [None]:
# Pixel distribution
plt.hist(dataset[0][0].permute(1,2,0).ravel(), bins=50, density=True)
plt.xlabel("Pixel values")
plt.ylabel("Relative frequency")
plt.title("Distribution of pixels")

#### Split
<a name='split'></a>

In [None]:
VAL_SIZE = 0.2
TEST_SIZE = 0.2

temp_dataset, test_dataset = train_test_split(dataset, test_size=TEST_SIZE)
train_dataset, val_dataset = train_test_split(temp_dataset, test_size=VAL_SIZE)

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

### Hyperparameters
<a name='hyperparameters'></a>

In [None]:
TRAIN_EPOCS = 30
BATCH_SIZE = 8

WORKERS = 8

### DataLoaders
<a name="dataloader"></a>

In [None]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS)

print(f"Train Dataloader size: {len(train_loader)}")
print(f"Validation Dataloader size: {len(val_loader)}")
print(f"Test Dataloader size: {len(test_loader)}")

In [None]:
from lib.utils import get_dataset_samples

In [None]:
get_dataset_samples(base_dataset["dataset"], "Base dataset")

In [None]:
get_dataset_samples(augmented_dataset["dataset"], "Augmented dataset")

#### Model
<a name='model'></a>

In [None]:
from lib.uav_vit import UAV_vit

# Load the ViT model
model_name = "vit_l_32"
model_weights = ViT_L_32_Weights.IMAGENET1K_V1

loss_fn = TF.mse_loss
vit_l_32 = get_model(model_name, weights=model_weights)

model = UAV_vit(vit_l_32, loss_fn)

## Training
<a name='training'></a>

In [None]:
# Initialize a trainer
trainer = pl.Trainer(max_epochs=TRAIN_EPOCS)

In [None]:
# Train the model
trainer.fit(model, train_loader, val_loader)

In [None]:
# Save the trained model
torch.save(model.state_dict(), "model.pth")

#### Evaluation
<a name='prediction'></a>

In [None]:
trainer.test(model, test_loader, verbose=True)    

In [None]:
mlflow.log_param("model", f"{model_name} - {model_weights}")
mlflow.log_param("dataset", curr_dataset["name"])
mlflow.log_param("dataset_length", len(dataset))
mlflow.log_param("batch_size", BATCH_SIZE)
mlflow.log_param("train_epochs", TRAIN_EPOCS)
mlflow.log_param("hue", hue)
mlflow.log_param("saturation", saturation)
mlflow.log_param("brightness", brightness)
mlflow.log_param("contrast", contrast)
mlflow.end_run()

#### Visualization
<a name='visualization'></a>

In [None]:
from lib.visualizer import Visualizer

# Instantiate the visualizer
visualizer = Visualizer()

# Use the visualizer to plot the desired graphs
# visualizer.plot_loss(train_losses, val_losses)
# visualizer.plot_predictions(true_values, predicted_values)