# Overview
The following notebook is divided into the following parts:
- Preparation: Filepaths can be given for the inputs and outputs. Settings can be chosen for the following steps.
- Preprocessing: Adapt the images to the later steps through filters
- Distance computation: Normally done by feature detection, followed by feature matching, followed by distance score computation.
- Clustering: Compute a clustering from the beforehand computed distances.

# Preparation

In [3]:
# Import
import sys
import os

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Preprocessing
from joblib import Parallel, delayed
import utils
from utils import grayscale_directory, clahe_directory, apply_denoise_tv_chambolle_directory, circle_crop_directory

# Distance computing
from kornia_matcher import extract_kornia_matches_in_directory
import pandas as pd

# Clustering
from sklearn.cluster import AgglomerativeClustering

INFO:matplotlib.font_manager:generated new fontManager


In [None]:
# Variables
import variables

## Preprocessing

exp1 = os.path.join(variables.results_directory, "01_grayscale") # variables.results_directory+"01_grayscale\\"
exp2 = os.path.join(variables.results_directory, "02_histogram_equalization")
exp3 = os.path.join(variables.results_directory, "03_denoise")
exp4 = os.path.join(variables.results_directory, "04_histogram_equalization")
exp5 = os.path.join(variables.results_directory, "05_circle_crop")
folders = [exp1, exp2, exp3, exp4, exp5]

preprocessed_images_path = exp5
images_folder_path = variables.images_directory

## Distance computing

matching_file_path = variables.results_directory + variables.matching_file_name
clustering_file_path = variables.results_directory + variables.clustering_file_name

## Clustering
clusterer = AgglomerativeClustering(n_clusters=variables.number_of_clusters, linkage='complete', metric='precomputed')

# Preprocessing
See the Preprocessing notebook for more in depth information

In [None]:
if not os.path.exists(variables.results_directory):
    os.mkdir(variables.results_directory)
for folder in folders:
    if not os.path.exists(folder):
        os.mkdir(folder)

In [None]:
grayscale_directory(variables.images_directory, exp1)

In [None]:
clahe_directory(exp1, exp2)

In [None]:
apply_denoise_tv_chambolle_directory(exp2, exp3, weight=0.5)

In [None]:
clahe_directory(exp3, exp4)

In [None]:
circle_crop_directory(exp4, exp5)

# Distance computation

## Feature Detection and Description

In [None]:
distances = extract_kornia_matches_in_directory(preprocessed_images_path, method=variables.matching_computation_method, print_log=True)
distances.to_csv(matching_file_path)

cuda:0
Matching Handler started with device cuda:0.
Matching Handler now using method 4: matcher smnn.
1 / 256
2 / 256
3 / 256
4 / 256
5 / 256
6 / 256
7 / 256
8 / 256
9 / 256
10 / 256
11 / 256
12 / 256
13 / 256
14 / 256
15 / 256
16 / 256
17 / 256
18 / 256
19 / 256
20 / 256
21 / 256
22 / 256
23 / 256
24 / 256
25 / 256
26 / 256
27 / 256
28 / 256
29 / 256
30 / 256
31 / 256
32 / 256
33 / 256
34 / 256
35 / 256
36 / 256
37 / 256
38 / 256
39 / 256
40 / 256
41 / 256
42 / 256
43 / 256
44 / 256
45 / 256
46 / 256
47 / 256
48 / 256
49 / 256
50 / 256
51 / 256
52 / 256
53 / 256
54 / 256
55 / 256
56 / 256
57 / 256
58 / 256
59 / 256
60 / 256
61 / 256
62 / 256
63 / 256
64 / 256
65 / 256
66 / 256
67 / 256
68 / 256
69 / 256
70 / 256
71 / 256
72 / 256
73 / 256
74 / 256
75 / 256
76 / 256
77 / 256
78 / 256
79 / 256
80 / 256
81 / 256
82 / 256
83 / 256
84 / 256
85 / 256
86 / 256
87 / 256
88 / 256
89 / 256
90 / 256
91 / 256
92 / 256
93 / 256
94 / 256
95 / 256
96 / 256
97 / 256
98 / 256
99 / 256
100 / 256
101 /

In [None]:
# Save distances to file
df2 = pd.read_csv(matching_file_path)
paths = utils.get_paths(images_folder_path)
df2 = utils.add_path_to_df(df2, paths)
df2.to_csv(matching_file_path)

# Clustering

In [None]:
clustering = utils.compute_clustering(matching_file_path, clusterer = clusterer, distance_function=variables.distance_computation_method)
clustering.to_csv(clustering_file_path)
paths = utils.get_paths(images_folder_path)
clustering2 = utils.add_path_to_df(clustering, paths, name_column='object_number', set_index=False)
clustering2.to_csv(clustering_file_path)