## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [9]:
import os, sys
from pathlib import Path
sys.path.append('/home/k64835/Master-Thesis-SITS')
# sys.path.append('/Users/bhumikasadbhave007/Documents/THWS/Semester-4/MASTER-THESIS/GITHUB/Master-Thesis-SITS')

scripts_path = Path("../Data-Preprocessing/").resolve()
sys.path.append(str(scripts_path))

scripts_path = Path("../Evaluation/").resolve()
sys.path.append(str(scripts_path))

In [10]:
import pickle
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestCentroid
from scripts.data_visualiser import *
from scripts.data_loader import *
from scripts.data_preprocessor import *
from scripts.temporal_data_preprocessor import *
from scripts.temporal_data_loader import *
from scripts.temporal_visualiser import *
from scripts.temporal_chanel_refinement import *
from model_scripts.model_helper import *
from model_scripts.dataset_creation import *
from model_scripts.train_model_ae import *
from model_scripts.model_visualiser import *
from model_scripts.subpatch_extraction import *
from model_scripts.clustering import *
from evaluation_scripts.result_visualiser import *
from evaluation_scripts.evaluation_helper import *
from model_scripts.feature_extraction import *
from Pipeline.temporal_preprocessing_pipeline import *
from Pipeline.temporal_preprocessing_pipeline import *
import numpy as np
import config as config
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import skimage.measure
import torch
import torch.nn as nn
import torch.optim as optim

## Prepare dataset: B10

### Loading the pre-processed data

Data: Extracted and Pre-processed Patches (each patch containing a sugarbeet field)

Dimensions: (N, T, C, H, W) = (N, 7, 10, 64, 64)

In [4]:
preprocessing_pipeline = PreProcessingPipelineTemporal()
field_numbers_train, acquisition_dates_train, patch_tensor_train, visualisation_train = preprocessing_pipeline.get_processed_temporal_cubes('train', 'b10')
field_numbers_eval, acquisition_dates_eval, patch_tensor_eval, visualisation_eval = preprocessing_pipeline.get_processed_temporal_cubes('eval', 'b10')
patch_tensor_train.shape, patch_tensor_eval.shape

(torch.Size([1228, 7, 10, 64, 64]), torch.Size([48, 7, 10, 64, 64]))

### Create Sub-Patches

In [5]:
train_subpatches, train_subpatch_coords = non_overlapping_sliding_window(patch_tensor_train, field_numbers_train, patch_size=config.subpatch_size)
eval_subpatches, eval_subpatch_coords = non_overlapping_sliding_window(patch_tensor_eval, field_numbers_eval, patch_size=config.subpatch_size)
train_subpatches.shape, eval_subpatches.shape

(torch.Size([33128, 7, 10, 4, 4]), torch.Size([1197, 7, 10, 4, 4]))

Get properly formatted field numbers for the evaluation function

In [6]:
train_coord_fn = get_string_fielddata(train_subpatch_coords)
eval_coord_fn = get_string_fielddata(eval_subpatch_coords)

## 1. Feature Extraction using Channel-wise Histrograms

### Feature extraction 

In [87]:
histogram_features_train = extract_global_histogram(train_subpatches, bins=30)
histogram_features_eval = extract_global_histogram(eval_subpatches, bins=30)
histogram_features_train.shape

(33128, 30)

### Modeling: k-means

Clustering the sub-patches 

In [93]:
# Code to load saved model, uncomment the below 2 lines
# with open(config.kmeans_hist_path, 'rb') as file:
#     kmeans_b10 = pickle.load(file)

In [88]:
#comment the below line when loading saved model
kmeans_b10 = kmeans_function(histogram_features_train, n_clusters=2, random_state=31)

train_subpatch_predictions = kmeans_b10.predict(histogram_features_train.reshape(histogram_features_train.shape[0],-1))
eval_subpatch_predictions = kmeans_b10.predict(histogram_features_eval.reshape(histogram_features_eval.shape[0],-1))

### Evaluation

Ensure all predictions are not zeros/ones

In [89]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [90]:
# disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold, 'Histogram Features', True)  #for saving predictions
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 63.93
Precision: 69.7
Recall: 65.71
F1-score: 67.65
F2-score: 66.47


Save Model

In [92]:
# with open(config.kmeans_hist_path, 'wb') as file:
#     pickle.dump(kmeans_b10, file)

### 3 Executions

Run the k-means over histogram features over 3 executions for accounting uncertainty

In [9]:
total_accuracy = 0
total_recall = 0
for i in range(3):
    kmeans_b10 = kmeans_function(histogram_features_train, n_clusters=2, random_state=random.randint(1, 100))
    eval_subpatch_predictions = kmeans_b10.predict(histogram_features_eval.reshape(histogram_features_eval.shape[0],-1))
    disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
    print(f"Run {i+1}: Accuracy={acc}, Recall={recall}")
    total_accuracy += acc
    total_recall += recall

print("Average Accuracy:",total_accuracy/3)
print("Average Recall:",total_recall/3)

Run 1: Accuracy=68.85, Recall=74.29
Run 2: Accuracy=68.85, Recall=74.29
Run 3: Accuracy=68.85, Recall=74.29
Average Accuracy: 68.85
Average Recall: 74.29


## 2. Feature Reduction using PCA

### Feature extraction 

Here, we apply PCA to channel dimension of every data sample, and get top 3 channels per sample.
The resulting list of features has a size of (H * W * T * 3) per sample. 
We also print the overall top 3 channels for train and test data that was retained by PCA.

Channels retained in decreasing order of importance: 6, 8 and 5, corresponding to bands 8 (NIR), 11 (SWIR 1), and 7 (Red-edge 3).

In [13]:
features_train, top_channel_indices_train = pca_feature_extraction_channel(train_subpatches, n_components=config.pca_components) 
features_eval, top_channel_indices_eval = pca_feature_extraction_channel(eval_subpatches, n_components=config.pca_components)
len(features_eval)

1197

In [14]:
top_channel_indices_train, top_channel_indices_eval

([np.int64(6), np.int64(8), np.int64(5)],
 [np.int64(6), np.int64(8), np.int64(5)])

### Modeling: k-means

Clustering the sub-patches 

In [15]:
kmeans_b10 = kmeans_function(np.array(features_train), n_clusters=2, random_state=32)

train_subpatch_predictions = kmeans_b10.predict(np.array(features_train))
eval_subpatch_predictions = kmeans_b10.predict(np.array(features_eval))

### Evaluation

Ensure all predictions are not zeros/ones

In [16]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [17]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 57.38
Precision: 58.82
Recall: 85.71
F1-score: 69.77
F2-score: 78.53


Save Model

In [None]:
# with open(config.kmeans_pca_path, 'wb') as file:
#     pickle.dump(kmeans_b10, file)

## 3. Feature Extraction using Vegetation Indices

### Data Loading + Feature Extraction
#### Loading the pre-processed data with Vegetation Indices = NDVI, MSI, EVI

Dimensions: (N, T, C, H, W) = (N, 7, 3, 64, 64)

In [19]:
preprocessing_pipeline = PreProcessingPipelineTemporal()
field_numbers_train, acquisition_dates_train, patch_tensor_train, visualisation_train = preprocessing_pipeline.get_processed_temporal_cubes('train', 'mvi')
field_numbers_eval, acquisition_dates_eval, patch_tensor_eval, visualisation_eval = preprocessing_pipeline.get_processed_temporal_cubes('eval', 'mvi')
patch_tensor_train.shape, patch_tensor_eval.shape

(torch.Size([1228, 7, 3, 64, 64]), torch.Size([48, 7, 3, 64, 64]))

### Create Sub-Patches

In [20]:
train_subpatches, train_subpatch_coords = non_overlapping_sliding_window(patch_tensor_train, field_numbers_train, patch_size=config.subpatch_size)
eval_subpatches, eval_subpatch_coords = non_overlapping_sliding_window(patch_tensor_eval, field_numbers_eval, patch_size=config.subpatch_size)
train_subpatches.shape, eval_subpatches.shape

(torch.Size([33128, 7, 3, 4, 4]), torch.Size([1197, 7, 3, 4, 4]))

Get properly formatted field numbers for the evaluation function

In [21]:
train_coord_fn = get_string_fielddata(train_subpatch_coords)
eval_coord_fn = get_string_fielddata(eval_subpatch_coords)

### Modeling: k-means

Clustering the sub-patches 

In [25]:
kmeans_b10 = kmeans_function(train_subpatches, n_clusters=2, random_state=32)

train_subpatch_predictions = kmeans_b10.predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
test_subpatch_predictions = kmeans_b10.predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [26]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [27]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, test_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 44.26
Precision: 51.11
Recall: 65.71
F1-score: 57.5
F2-score: 62.16


Save Model

In [28]:
# with open(config.kmeans_vi_path, 'wb') as file:
#     pickle.dump(kmeans_b10, file)