In [1]:
%matplotlib inline

#
# import utilitaires
#
import numpy as np
import pandas as pd

from helpers.Concurrent import create_thread_pool_executor
from helpers.MetaObject import MetaObject
from helpers.Profile import Profile
from helpers.Jupyter import display_html

from helpers.features.Orb import OrbFeaturesConfig, orb_features_load
from helpers.dataset.PlantVillage import PlantVillageConfig, plant_village_load

from sklearn.cluster import KMeans
from time import time

In [2]:
#
# parametres dataset PlantVillage
#
config_overrides = MetaObject.from_json("config_overrides.json")
executor = create_thread_pool_executor()

pv_config = PlantVillageConfig(executor)
if not config_overrides is None:
    MetaObject.override_from_object(pv_config, 
                                    config_overrides.dataset)

print("pv_config")
display(pv_config.__dict__)

pv_config


{'url': 'https://tinyurl.com/22tas3na',
 'install_path': 'dataset/PlantVillage.hd5',
 'species_disease_re': '(.*)(?:___)(.*)',
 'species_re': '(.*)(?:,_|_)(.*)',
 'label_separator': '_',
 'thumbnail_scale': 0.25,
 'force_download': False,
 'keep_download': True,
 'extract_one_folder_up': True,
 'allow_update': False,
 'executor': <concurrent.futures.thread.ThreadPoolExecutor at 0x208ccba53d0>,
 'force_install': False}

In [3]:
#
# obtenir le dataset PlantVillage
#
pv_dataset = plant_village_load(pv_config)
if pv_dataset is None:
    display_html(f"<b>Invalid dataset</b>")
else:
    pv_h5_file = pv_dataset.h5_file
    pv_dataframe = pd.DataFrame(pv_dataset.data)

display( pv_dataframe.shape )  
display( pv_dataframe.head() )

(60343, 5)

Unnamed: 0,species,disease,image_path,thumbnail_path,label
0,Apple,Apple_scab,images/Apple___Apple_scab/image (123).JPG,thumbnails/Apple___Apple_scab/image (123).JPG,Apple_scab
1,Apple,Apple_scab,images/Apple___Apple_scab/image (11).JPG,thumbnails/Apple___Apple_scab/image (11).JPG,Apple_scab
2,Apple,Apple_scab,images/Apple___Apple_scab/image (111).JPG,thumbnails/Apple___Apple_scab/image (111).JPG,Apple_scab
3,Apple,Apple_scab,images/Apple___Apple_scab/image (115).JPG,thumbnails/Apple___Apple_scab/image (115).JPG,Apple_scab
4,Apple,Apple_scab,images/Apple___Apple_scab/image (122).JPG,thumbnails/Apple___Apple_scab/image (122).JPG,Apple_scab


In [4]:
#
# parametres pour features
#
features_config = OrbFeaturesConfig(executor)
if not config_overrides is None:
    MetaObject.override_from_object(features_config, 
                                    config_overrides.orb_features)

print("features_config")
display(features_config.__dict__)

features_config


{'install_path': 'dataset/OrbFeatures.hd5',
 'force_generate': False,
 'allow_update': False,
 'nfeatures': 10,
 'image_iterable': None,
 'image_h5': None,
 'image_count': 0,
 'executor': <concurrent.futures.thread.ThreadPoolExecutor at 0x208ccba53d0>,
 'chunk_size': 150}

In [5]:
#
# obtenir les features pour chaque image
#
features_config.image_count = pv_dataframe.shape[0]
features_config.image_iterable = pv_dataframe["image_path"]
features_config.image_h5 = pv_h5_file
orb = orb_features_load(features_config)

if orb is None:
    display_html(f"<b>Invalid features</b>")
else:
    print("ORB Features:", orb.features.shape, orb.features.dtype)
    print("ORB Indices (dans PlantVillage):", orb.indices.shape, orb.indices.dtype)

ORB Features: (571117, 32) uint8
ORB Indices (dans PlantVillage): (571117,) uint16


In [6]:
orb_dataframe = pd.DataFrame(orb.features)

display(orb_dataframe.shape)
display(orb_dataframe.head())

(571117, 32)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,159,88,71,175,6,18,238,112,165,194,...,53,236,146,107,234,128,47,183,57,212
1,222,86,231,229,54,2,122,56,37,236,...,133,206,242,233,175,132,39,183,53,81
2,215,66,71,165,38,2,107,48,45,162,...,133,202,144,235,239,132,45,171,113,33
3,95,72,231,143,38,82,107,80,229,202,...,173,232,146,106,235,128,47,183,113,117
4,38,150,117,234,124,243,250,216,143,25,...,78,245,30,92,171,255,151,143,125,183


In [None]:
vocabulary = KMeans(1000, random_state=42, verbose=0)

with Profile() as kmeans_fit:
    vocabulary.fit(orb_dataframe)
    

In [9]:
print(round(kmeans_fit.duration), "s")

4351 s


In [12]:
vocabulary.cluster_centers_.shape
vocabulary.cluster_centers_.dtype

dtype('float64')

In [None]:
vocabulary.