## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys
from pathlib import Path
sys.path.append('/home/k64835/Master-Thesis-SITS')
# sys.path.append('/Users/bhumikasadbhave007/Documents/THWS/Semester-4/MASTER-THESIS/GITHUB/Master-Thesis-SITS')

scripts_path = Path("../Data-Preprocessing/").resolve()
sys.path.append(str(scripts_path))

scripts_path = Path("../Evaluation/").resolve()
sys.path.append(str(scripts_path))

In [3]:
import pickle
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestCentroid
from scripts.data_visualiser import *
from scripts.data_loader import *
from scripts.data_preprocessor import *
from scripts.temporal_data_preprocessor import *
from scripts.temporal_data_loader import *
from scripts.temporal_visualiser import *
from scripts.temporal_chanel_refinement import *
from model_scripts.model_helper import *
from model_scripts.dataset_creation import *
from model_scripts.train_model_ae import *
from model_scripts.model_visualiser import *
from model_scripts.subpatch_extraction import *
from model_scripts.clustering import *
from evaluation_scripts.result_visualiser import *
from evaluation_scripts.evaluation_helper import *
from Pipeline.temporal_preprocessing_pipeline import *
from Pipeline.temporal_preprocessing_pipeline import *
from Pipeline.preprocess_script import *
import numpy as np
import config as config
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import skimage.measure
import torch
import torch.nn as nn
import torch.optim as optim

## Prepare dataset: B10

### Loading the pre-processed data

Data: Extracted and Pre-processed sub-patches

Dimensions: (N, T, C, H, W) = (N, 7, 10, 4, 4)

In [4]:
train_subpatches, eval_subpatches, train_coord_fn, eval_coord_fn = get_model_ready_data(model_type='baseline', tensor_type='b10', encoding_method='sin-cos', visualisation_images=False)
train_subpatches.shape, eval_subpatches.shape

(torch.Size([33128, 7, 10, 4, 4]), torch.Size([1197, 7, 10, 4, 4]))

## 1. K-means Clustering 

### Modeling

Clustering the sub-patches 

In [13]:
#Code to load saved model, uncomment the below 2 lines
# with open(config.kmeans_b10_local_path, 'rb') as file:
#     kmeans_b10 = pickle.load(file)

In [8]:
#comment the below line when loading saved model
kmeans_b10 = kmeans_function(train_subpatches, n_clusters=2, random_state=1)    

train_subpatch_predictions = kmeans_b10.predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
eval_subpatch_predictions = kmeans_b10.predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [9]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [12]:
# disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold, 'Flattened Data', True) #for saving predictions
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 1
Accuracy: 63.93
Precision: 62.75
Recall: 91.43
F1-score: 74.42
F2-score: 83.77


Save Model

In [14]:
# with open(config.kmeans_b10_path, 'wb') as file:
#     pickle.dump(kmeans_b10, file)

### 3 Executions

In [10]:
total_accuracy = 0
total_recall = 0
for i in range(3):
    kmeans_b10 = kmeans_function(train_subpatches, n_clusters=2, random_state=random.randint(1, 100))    
    eval_subpatch_predictions = kmeans_b10.predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())
    disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, eval_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
    print(f"Run {i+1}: Accuracy={acc}, Recall={recall}")
    total_accuracy += acc
    total_recall += recall

print("Average Accuracy:",total_accuracy/3)
print("Average Recall:",total_recall/3)

Run 1: Accuracy=63.93, Recall=91.43
Run 2: Accuracy=63.93, Recall=91.43
Run 3: Accuracy=63.93, Recall=91.43
Average Accuracy: 63.93
Average Recall: 91.43


## 2. Agglomerative Clustering 

### Modeling

Clustering the sub-patches 

In [5]:
agg_b10 = agg_clustering_function(train_subpatches, n_clusters=2)

In [6]:
train_subpatch_predictions = agg_b10.fit_predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
test_subpatch_predictions = agg_b10.fit_predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [None]:
np.unique(test_subpatch_predictions)

array([0, 1])

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [7]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, test_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 0
Accuracy: 42.62
Precision: 50.0
Recall: 54.29
F1-score: 52.05
F2-score: 53.37


Save Model

In [None]:
# with open(config.agg_path, 'wb') as file:
#     pickle.dump(agg_b10, file)

## 3. K-medoids Clustering 

### Modeling

Clustering the sub-patches 

In [None]:
kmedoids_b10 = kmedoids_function(train_subpatches, n_clusters=2, random_state=4, metric='manhattan')

train_subpatch_predictions = kmedoids_b10.predict(train_subpatches.reshape(train_subpatches.size(0), -1).numpy())
test_subpatch_predictions = kmedoids_b10.predict(eval_subpatches.reshape(eval_subpatches.size(0), -1).numpy())

### Evaluation

Ensure all predictions are not zeros/ones

In [23]:
np.unique(eval_subpatch_predictions)

array([0, 1], dtype=int32)

Clustering Accuracy: Convert sub-patch level labels to patch-level labels and compare with ground truth

In [24]:
disease, acc, precision, recall, f1_score, f2_score = evaluate_clustering_metrics(eval_coord_fn, test_subpatch_predictions, config.labels_path, config.subpatch_to_patch_threshold)
print("Disease cluster:", disease)
print("Accuracy:",acc)
print("Precision:",precision)
print("Recall:",recall)
print("F1-score:",f1_score)
print("F2-score:", f2_score)

Disease cluster: 1
Accuracy: 67.21
Precision: 85.71
Recall: 51.43
F1-score: 64.29
F2-score: 55.9


Save Model

In [25]:
# with open(config.kmedoids_path, 'wb') as file:
#     pickle.dump(kmedoids_b10, file)