# Overview
The following notebook is divided into the following parts:
- Preparation: Filepaths can be given for the inputs and outputs. Settings can be chosen for the following steps.
- Preprocessing: Adapt the images to the later steps through filters
- Distance computation: Normally done by feature detection, followed by feature matching, followed by distance score computation.
- Clustering: Compute a clustering from the beforehand computed distances.

# Preparation

In [1]:
# Import
import sys
import os

module_path = os.path.abspath(os.path.join('..', '..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Preprocessing
from joblib import Parallel, delayed
import utils
from utils import grayscale_directory, clahe_directory, apply_denoise_tv_chambolle_directory, circle_crop_directory, create_comparison_file, analyseClustering

# Distance computing
from kornia_matcher import extract_kornia_matches_in_directory
import pandas as pd

# Clustering
from sklearn.cluster import AgglomerativeClustering

In [2]:
# Variables
import variables

## Preprocessing
exp1 = os.path.join(variables.results_directory, "01_grayscale") # variables.results_directory+"01_grayscale\\"
exp2 = os.path.join(variables.results_directory, "02_histogram_equalization")
exp3 = os.path.join(variables.results_directory, "03_denoise")
exp4 = os.path.join(variables.results_directory, "04_histogram_equalization")
exp5 = os.path.join(variables.results_directory, "05_circle_crop")
folders = [exp1, exp2, exp3, exp4, exp5]

preprocessed_images_path = exp5
images_folder_path = variables.images_directory

## Distance computing
matching_file_path = variables.results_directory + variables.matching_file_name
clustering_file_path = variables.results_directory + variables.clustering_file_name
graph_file_path = variables.results_directory + variables.graph_file_name

## Clustering
# linkage option: "complete", "average"
clusterer = AgglomerativeClustering(n_clusters=variables.number_of_clusters, linkage='complete', metric='precomputed')  

# Preprocessing
See the Preprocessing notebook for more in depth information

In [3]:
if not os.path.exists(variables.results_directory):
    os.mkdir(variables.results_directory)
for folder in folders:
    if not os.path.exists(folder):
        os.mkdir(folder)

In [4]:
grayscale_directory(variables.images_directory, exp1)

In [5]:
clahe_directory(exp1, exp2)

In [6]:
apply_denoise_tv_chambolle_directory(exp2, exp3, weight=0.5)

In [7]:
clahe_directory(exp3, exp4)

In [8]:
circle_crop_directory(exp4, exp5)

# Distance computation

## Feature Detection and Description

In [None]:
distances = extract_kornia_matches_in_directory(preprocessed_images_path, method=variables.matching_computation_method, print_log=True)
distances.to_csv(matching_file_path)

cuda:0
Matching Handler started with device cuda:0.
Matching Handler now using method 4: matcher smnn.
1 / 1787
2 / 1787
3 / 1787
4 / 1787
5 / 1787
6 / 1787
7 / 1787
8 / 1787
9 / 1787
10 / 1787
11 / 1787
12 / 1787
13 / 1787
14 / 1787
15 / 1787
16 / 1787
17 / 1787
18 / 1787
19 / 1787
20 / 1787
21 / 1787
22 / 1787
23 / 1787
24 / 1787
25 / 1787
26 / 1787
27 / 1787
28 / 1787
29 / 1787
30 / 1787
31 / 1787
32 / 1787
33 / 1787
34 / 1787
35 / 1787
36 / 1787
37 / 1787
38 / 1787
39 / 1787
40 / 1787
41 / 1787
42 / 1787
43 / 1787
44 / 1787
45 / 1787
46 / 1787
47 / 1787
48 / 1787
49 / 1787
50 / 1787
51 / 1787
52 / 1787
53 / 1787
54 / 1787
55 / 1787
56 / 1787
57 / 1787
58 / 1787
59 / 1787
60 / 1787
61 / 1787
62 / 1787
63 / 1787
64 / 1787
65 / 1787
66 / 1787
67 / 1787
68 / 1787
69 / 1787
70 / 1787
71 / 1787
72 / 1787
73 / 1787
74 / 1787
75 / 1787
76 / 1787
77 / 1787
78 / 1787
79 / 1787
80 / 1787
81 / 1787
82 / 1787
83 / 1787


In [None]:
# Save distances to file
df2 = pd.read_csv(matching_file_path)
paths = utils.get_paths(images_folder_path)
df2 = utils.add_path_to_df(df2, paths)
df2.to_csv(matching_file_path)

# Clustering

In [None]:
clustering = utils.compute_clustering(variables.matching_file_path, clusterer = clusterer, distance_function=variables.distance_computation_method)
clustering.to_csv(clustering_file_path)
paths = utils.get_paths(images_folder_path)
clustering2 = utils.add_path_to_df(clustering, paths, name_column='object_number', set_index=False)
clustering2.to_csv(variables.clustering_file_path)

# Evaluation

In [None]:
if variables.ground_truth_file != "":
    # Comparison file for graph visualization (applying clustering)

    name = os.path.splitext(os.path.basename(clustering_file_path))[0]
    
    create_comparison_file(
        [matching_file_path], 
        [name], 
        1, 
        variables.number_of_clusters, 
        target_file=graph_file_path, 
        number_of_images=-1,
        true_values_file = variables.ground_truth_file,
        distance_function=[variables.distance_computation_method], 
        clusterers = [clusterer])
    
    plot_dataframe = pd.concat([pd.read_csv(graph_file_path).iloc[:, [1,2,3]]]) # , pd.read_csv("max_possible_values.csv").iloc[:50, 1:3]
    plot = plot_dataframe.plot(title="Distancefunction comparison")
    print(plot)

In [None]:
if variables.ground_truth_file != "":
    analyseClustering(matching_file_path, 
                  variables.ground_truth_file, 
                  distance_function=variables.distance_computation_method,
                  side=True,
                  clusterer=clusterer)