In [1]:
%%html

<style>    
    @import url("css/custom_styles.css")
</style>

In [2]:
%matplotlib inline

#
# import utilitaires
#
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import helpers as hlp
import helpers.bovw as bovw
import helpers.dataset.PlantVillage as pv
import helpers.features as feat

from sklearn.cluster import KMeans, MiniBatchKMeans

In [3]:
#
# dataset PlantVillage
#
config_overrides = hlp.MetaObject.from_json("config_overrides.json")
executor = hlp.create_thread_pool_executor()

pv_config = pv.Config(executor)
if not config_overrides is None:
    hlp.MetaObject.override_from_object(pv_config, 
                                        config_overrides.dataset)

print("pv_config")
display(vars(pv_config))

pv_dataset = pv.load(pv_config)
pv_dataframe = pv_dataset.dataframe


#
# features orb
#
orb_features_config = feat.OrbFeaturesConfig(executor)
if not orb_features_config is None:
    hlp.MetaObject.override_from_object(orb_features_config, 
                                        config_overrides.orb.features)

print("orb_features_config")
display(vars(orb_features_config))

orb_iter = feat.DatasetIter(pv_dataset, 
                            pv_dataframe["image_path"].iteritems(),
                            pv_dataframe.shape[0])
orb_features = feat.load(orb_features_config, orb_iter)

print("ORB features", orb_features.features.shape)
print("ORB keypoints", orb_features.key_points.shape)

pv_config


{'url': 'https://tinyurl.com/22tas3na',
 'install_path': 'dataset/PlantVillage.hd5',
 'species_disease_re': '(.*)(?:___)(.*)',
 'species_re': '(.*)(?:,_|_)(.*)',
 'label_separator': '_',
 'thumbnail_scale': 0.25,
 'force_download': False,
 'read_only': True,
 'executor': <concurrent.futures.thread.ThreadPoolExecutor at 0x2219e231610>,
 'force_install': False}

orb_features_config


{'install_path': 'dataset/OrbFeatures.hd5',
 'force_generate': False,
 'read_only': True,
 'executor': <concurrent.futures.thread.ThreadPoolExecutor at 0x2219e231610>,
 'chunk_size': 150,
 'nfeatures': 500}

ORB features (20908751, 32)
ORB keypoints (20908751, 7)


In [4]:
#
# parametres bag of visual words
#
bovw_config = bovw.Config()
if not config_overrides is None:
    hlp.MetaObject.override_from_object(bovw_config, 
                                        config_overrides.orb.bovw)

print("bovw_config")
display(vars(bovw_config))

bovw_config


{'install_path': 'dataset/OrbBoVW.hd5',
 'read_only': True,
 'n_clusters': 200,
 'force_generate': False}

In [None]:
clusterer = KMeans(n_clusters=200)

In [None]:
with hlp.Profile() as orb_kmeans_profile:
    clusterer.fit(orb_features.features)

print("ORB BoVW process time:", orb_kmeans_profile.round_duration(), "s")