## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys
from pathlib import Path
sys.path.append('/home/k64835/Master-Thesis-SITS')

scripts_path = Path("../Data-Preprocessing/").resolve()
sys.path.append(str(scripts_path))

scripts_path = Path("../Evaluation/").resolve()
sys.path.append(str(scripts_path))

In [3]:
import pickle
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestCentroid
from scripts.data_visualiser import *
from scripts.data_loader import *
from scripts.data_preprocessor import *
from scripts.temporal_data_preprocessor import *
from scripts.temporal_data_loader import *
from scripts.temporal_visualiser import *
from scripts.temporal_chanel_refinement import *
from model_scripts.model_helper import *
from model_scripts.dataset_creation import *
from model_scripts.train_model_ae import *
from model_scripts.model_visualiser import *
from model_scripts.subpatch_extraction import *
from model_scripts.clustering import *
from evaluation_scripts.result_visualiser import *
from evaluation_scripts.evaluation_helper import *
from Pipeline.temporal_preprocessing_pipeline import *
from Pipeline.temporal_preprocessing_pipeline import *
import numpy as np
import config as config
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import skimage.measure
import torch
import torch.nn as nn
import torch.optim as optim

  warn(


## Prepare dataset: B10

### Loading the pre-processed data

Data: Extracted and Pre-processed Patches (each patch containing a sugarbeet field)

Dimensions: (N, T, C, H, W) = (N, 7, 10, 64, 64)

In [4]:
preprocessing_pipeline = PreProcessingPipelineTemporal()
field_numbers_train, acquisition_dates_train, patch_tensor_train, visualisation_train = preprocessing_pipeline.get_processed_temporal_cubes('train', 'b10')
field_numbers_eval, acquisition_dates_eval, patch_tensor_eval, visualisation_eval = preprocessing_pipeline.get_processed_temporal_cubes('eval', 'b10')
patch_tensor_train.shape, patch_tensor_eval.shape

(torch.Size([2425, 7, 10, 64, 64]), torch.Size([48, 7, 10, 64, 64]))

### Create Sub-Patches

In [5]:
train_subpatches, train_subpatch_coords = non_overlapping_sliding_window(patch_tensor_train, field_numbers_train, patch_size=config.subpatch_size)
eval_subpatches, eval_subpatch_coords = non_overlapping_sliding_window(patch_tensor_eval, field_numbers_eval, patch_size=config.subpatch_size)
train_subpatches.shape, eval_subpatches.shape

(torch.Size([54623, 7, 10, 4, 4]), torch.Size([1197, 7, 10, 4, 4]))

Get properly formatted field numbers for the evaluation function

In [6]:
train_coord_fn = get_string_fielddata(train_subpatch_coords)
eval_coord_fn = get_string_fielddata(eval_subpatch_coords)

## 1. K-means Clustering 

### Modeling

Clustering the sub-patches 

In [39]:
kmeans_b10 = kmeans_function(train_subpatches, n_clusters=2, random_state=1)

train_subpatch_predictions = kmeans_b10.predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
eval_subpatch_predictions = kmeans_b10.predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [40]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [41]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 65.57
Precision: 64.0
Recall: 91.43
F1-score: 75.29
F2-score: 84.21


Save Model

In [43]:
# with open(config.kmeans_b10_path, 'wb') as file:
#     pickle.dump(kmeans_b10, file)

## 2. Agglomerative Clustering 

### Modeling

Clustering the sub-patches 

In [18]:
agg_b10 = agg_clustering_function(train_subpatches, n_clusters=2)

In [19]:
train_subpatch_predictions = agg_b10.fit_predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
test_subpatch_predictions = agg_b10.fit_predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [20]:
np.unique(test_subpatch_predictions)

array([0, 1])

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [21]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, test_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 42.62
Precision: 50.0
Recall: 54.29
F1-score: 52.05
F2-score: 53.37


Save Model

In [23]:
# with open(config.agg_path, 'wb') as file:
#     pickle.dump(agg_b10, file)

## 3. K-medoids Clustering 

### Modeling

Clustering the sub-patches 

In [35]:
kmedoids_b10 = kmedoids_function(train_subpatches, n_clusters=2, random_state=4, metric='manhattan')

train_subpatch_predictions = kmedoids_b10.predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
test_subpatch_predictions = kmedoids_b10.predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [36]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [37]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, test_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 65.57
Precision: 68.42
Recall: 74.29
F1-score: 71.23
F2-score: 73.03


Save Model

In [38]:
with open(config.kmedoids_path, 'wb') as file:
    pickle.dump(kmedoids_b10, file)