In [1]:
%matplotlib inline

#
# import utilitaires
#
import pandas as pd

from helpers.Concurrent import create_thread_pool_executor
from helpers.MetaObject import MetaObject
from helpers.Jupyter import display_html

from helpers.features.Orb import OrbFeaturesConfig, orb_features_load
from helpers.dataset.PlantVillage import PlantVillageConfig, plant_village_load

In [2]:
#
# parametres dataset PlantVillage
#
config_overrides = MetaObject.from_json("config_overrides.json")
executor = create_thread_pool_executor(max_workers=None)

pv_config = PlantVillageConfig(executor)
if not config_overrides is None:
    MetaObject.override_from_object(pv_config, 
                                    config_overrides.dataset)

print("PlantVillage config")
display(pv_config.__dict__)

PlantVillage config


{'url': 'https://tinyurl.com/22tas3na',
 'install_path': 'dataset/PlantVillage.hd5',
 'species_disease_re': '(.*)(?:___)(.*)',
 'species_re': '(.*)(?:,_|_)(.*)',
 'label_separator': '_',
 'thumbnail_scale': 0.25,
 'force_download': False,
 'read_only': True,
 'executor': <concurrent.futures.thread.ThreadPoolExecutor at 0x29f3ed301f0>,
 'force_install': False}

In [3]:
#
# obtenir le dataset PlantVillage
#
%prun pv_dataset = plant_village_load(pv_config)

  0%|          | 0/61526 [00:00<?, ?it/s]

 

In [4]:
if pv_dataset is None:
    display_html(f"<b>Invalid dataset</b>")
else:
    pv_dataframe = pd.DataFrame(pv_dataset.dataframe)

display( pv_dataframe.shape )
display( pv_dataframe.head() )

(60343, 5)

Unnamed: 0,species,disease,label,image_path,thumbnail_path
0,Apple,Apple_scab,Apple_scab,Plant_leave_diseases_dataset_with_augmentation...,thumbnails/Plant_leave_diseases_dataset_with_a...
1,Apple,Apple_scab,Apple_scab,Plant_leave_diseases_dataset_with_augmentation...,thumbnails/Plant_leave_diseases_dataset_with_a...
2,Apple,Apple_scab,Apple_scab,Plant_leave_diseases_dataset_with_augmentation...,thumbnails/Plant_leave_diseases_dataset_with_a...
3,Apple,Apple_scab,Apple_scab,Plant_leave_diseases_dataset_with_augmentation...,thumbnails/Plant_leave_diseases_dataset_with_a...
4,Apple,Apple_scab,Apple_scab,Plant_leave_diseases_dataset_with_augmentation...,thumbnails/Plant_leave_diseases_dataset_with_a...


In [5]:
#
# parametres pour features
#
features_config = OrbFeaturesConfig(executor)
if not config_overrides is None:
    MetaObject.override_from_object(features_config, 
                                    config_overrides.orb_features)

print("features_config")
display(features_config.__dict__)

features_config


{'install_path': 'dataset/OrbFeatures.hd5',
 'force_generate': True,
 'read_only': True,
 'nfeatures': 500,
 'image_path_iterable': None,
 'image_path_count': 0,
 'zip_file': None,
 'executor': <concurrent.futures.thread.ThreadPoolExecutor at 0x29f3ed301f0>,
 'chunk_size': 150}

In [6]:
#
# obtenir les features pour chaque image
#
features_config.image_path_iterable = pv_dataframe["image_path"].to_list()
features_config.image_path_count = pv_dataframe.shape[0]
features_config.zip_file = pv_dataset.zip_file

In [7]:
%prun features_orb = orb_features_load(features_config)

  0%|          | 0/60343 [00:00<?, ?it/s]

 

In [None]:
display(features_orb.features.shape)
display(features_orb.indices.shape)
display(features_orb.lookup.indices.shape)

In [None]:
#
# reduire la quantity de features avec KMeans 
#

In [None]:
#
# split train/test
#
train, test = train_test_split(pv_dataframe, 
                               train_size=0.8, 
                               random_state=42, 
                               stratify=pv_dataframe["label"])

print(train.shape)
print(test.shape)