In [1]:
import os
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))

from imp import reload
import os
import numpy as np
import sys

from data_utils import DataHandler
import image_features
import similarities
import histogram_processing
import clustering

N_PROCESS = 10

  from imp import reload


# LOAD DATA

In [2]:
data_handler = DataHandler()

# Load images:
db, db_files = data_handler.load_images(
    folder="../data/BBDD/", extension=".jpg", desc="Loading BBDD Data...")

Initialized DataHandler with 2 processes


Loading BBDD Data...: 100%|██████████| 287/287 [00:05<00:00, 54.42it/s]


../data/BBDD/ read: 287 images


### Texture based clustering
#### HoG

In [3]:
import sklearn.decomposition

def PCA_decomposition(feature_matrix: np.array):
    PCA_decomp = sklearn.decomposition.PCA(
        n_components=min(15, min(feature_matrix.shape[0], feature_matrix.shape[1])),
        random_state=1234,
    )
    feature_matrix_PCA = PCA_decomp.fit_transform(feature_matrix.reshape(feature_matrix.shape[0], -1))
    return feature_matrix_PCA

In [4]:
db_texture_matrix = image_features.hog_batch(images = db[:], mssg="Histograms of Gradients (HoG) from BBDD...")
db_texture_matrix = np.array([feats.flatten() for feats in db_texture_matrix])
db_texture_matrix = PCA_decomposition(db_texture_matrix)

Histograms of Gradients (HoG) from BBDD...: 100%|██████████| 287/287 [00:46<00:00,  6.19it/s]


In [5]:
img_clusters = clustering.cluster_kmeans(db[:], db_texture_matrix, 5)

Found Clusters!


In [6]:
clustering.save_clusters(img_clusters, desc_method="HOG")

Storing images in their respective cluster folder...


#### LBP

In [7]:
db_texture_matrix = image_features.lbp_batch(images = db[:], histogram=False, mssg="LBP from BBDD...", block=False)
db_texture_matrix = PCA_decomposition(db_texture_matrix)

LBP from BBDD...: 100%|██████████| 287/287 [00:11<00:00, 25.78it/s]


In [8]:
reload(clustering)
img_clusters = clustering.cluster_kmeans(db[:], db_texture_matrix, 5)
clustering.save_clusters(img_clusters, desc_method="LBP")

Found Clusters!
Storing images in their respective cluster folder...


## Color Based Clustering

#### RGB

In [9]:
db_feature_matrix = histogram_processing.generate_feature_matrix(
    dataset=db[:],
    grayscale=False,
    n_levels=6,
    n_bins=6,
    RGB=True,
    CieLab=False,
    HSV=False,
    YCbCr=False,
    histogram3d=True,
    mssg="Generating features for BBDD dataset...(with N_PROCESS = {})".format(
        N_PROCESS),
    N_PROCESS=N_PROCESS,
)
db_feature_matrix = PCA_decomposition(db_feature_matrix)

Generating features for BBDD dataset...(with N_PROCESS = 10): 100%|██████████| 287/287 [00:24<00:00, 11.74it/s]


In [10]:
reload(clustering)
img_clusters = clustering.cluster_kmeans(db[:], db_feature_matrix, 5)
clustering.save_clusters(img_clusters, desc_method="RGB")

Found Clusters!
Storing images in their respective cluster folder...


#### LAB

In [11]:
db_feature_matrix = histogram_processing.generate_feature_matrix(
    dataset=db[:],
    grayscale=False,
    n_levels=6,
    n_bins=6,
    RGB=False,
    CieLab=True,
    HSV=False,
    YCbCr=False,
    histogram3d=True,
    mssg="Generating features for BBDD dataset...(with N_PROCESS = {})".format(N_PROCESS),
    N_PROCESS=N_PROCESS,
)
db_feature_matrix = PCA_decomposition(db_feature_matrix)

Generating features for BBDD dataset...(with N_PROCESS = 10): 100%|██████████| 287/287 [00:34<00:00,  8.40it/s]


In [12]:
img_clusters = clustering.cluster_kmeans(db[:], db_feature_matrix, 5)
clustering.save_clusters(img_clusters, desc_method="LAB")

Found Clusters!
Storing images in their respective cluster folder...


## Keypoint based clustering

### ORB

In [13]:
db_keypoints_orb, db_features_orb = list(
    image_features.orb_batch(
        images=db[:], mssg="Computing ORB from BBDD..."
    )
)

Computing ORB from BBDD...: 100%|██████████| 287/287 [00:37<00:00,  7.71it/s]


In [14]:
import cv2
n_matches_orb = similarities.compute_matches_batch(
    descriptor1=db_features_orb,
    descriptor2=db_features_orb,
    method="BF",
    similarity_measure=cv2.NORM_L2,
    k=2,
    plot=False,
    N_PROCESS=2,
)
n_matches_orb_sym = np.triu(n_matches_orb, k=0) + np.triu(n_matches_orb, k=1).T
distances_orb_sym = np.amax(n_matches_orb_sym) - n_matches_orb_sym

                                                                                         

In [15]:
img_clusters = clustering.cluster_agglomerative(imgs=db[:], features=distances_orb_sym, cluster_num=5, affinity="precomputed")
clustering.save_clusters(img_clusters, desc_method="ORB")

Found Clusters!
Storing images in their respective cluster folder...


## Frequency based clustering

In [16]:
dct_imgs = image_features.dct_batch(db[:], mssg="Computing block-based DCT for each of the images...", quantize=False, N_PROCESS=N_PROCESS)
dct_imgs = np.array([elem.flatten() for elem in dct_imgs])
dct_imgs = PCA_decomposition(dct_imgs)

Computing block-based DCT for each of the images...: 100%|██████████| 287/287 [00:11<00:00, 24.66it/s]


In [17]:
img_clusters = clustering.cluster_agglomerative(imgs=db[:], features=dct_imgs, cluster_num=5, affinity="euclidean")
clustering.save_clusters(img_clusters, desc_method="DCT")

Found Clusters!
Storing images in their respective cluster folder...
