# Dataset
[Plant Village](https://data.mendeley.com/datasets/tywbtsjrjv/1)


# Description

In this data-set, 39 different classes of plant leaf and background images are available. The data-set containing 61,486 images. We used six different augmentation techniques for increasing the data-set size. The techniques are image flipping, Gamma correction, noise injection, PCA color augmentation, rotation, and Scaling.

The classes are:
1. Apple_scab
2. Apple_black_rot
3. Apple_cedar_apple_rust
4. Apple_healthy
5. Background_without_leaves
6. Blueberry_healthy
7. Cherry_powdery_mildew
8. Cherry_healthy
9. Corn_gray_leaf_spot
10. Corn_common_rust
11. Corn_northern_leaf_blight
12. Corn_healthy
13. Grape_black_rot
14. Grape_black_measles
15. Grape_leaf_blight
16. Grape_healthy
17. Orange_haunglongbing
18. Peach_bacterial_spot
19. Peach_healthy
20. Pepper_bacterial_spot
21. Pepper_healthy
22. Potato_early_blight
23. Potato_healthy
24. Potato_late_blight
25. Raspberry_healthy
26. Soybean_healthy
27. Squash_powdery_mildew
28. Strawberry_healthy
29. Strawberry_leaf_scorch
30. Tomato_bacterial_spot
31. Tomato_early_blight
32. Tomato_healthy
33. Tomato_late_blight
34. Tomato_leaf_mold
35. Tomato_septoria_leaf_spot
36. Tomato_spider_mites_two-spotted_spider_mite
37. Tomato_target_spot
38. Tomato_mosaic_virus
39. Tomato_yellow_leaf_curl_virus

In [11]:
%matplotlib inline

#
# import utilitaires
#
import cv2
import numpy as np
import os
import pandas as pd
import requests
import time
import zipfile

from pprint import pprint
from tqdm.notebook import tqdm
from IPython.core.display import HTML

In [14]:
def display_html(html_message):
    display(HTML(html_message))

def download_dataset(dest_path, url, skip_download=False):
    """
    Utilitaire pour afficher progres d'un download zip a partir url
    """
    try:
        r = requests.get(url, stream=True)

        content_size = int(r.headers.get('content-length'))
        content_type = r.headers.get('content-type').lower()

        assert content_size == 949267727, "Dataset wrong size"
        assert content_type == "application/zip", "Dataset wrong type"

        content_disposition = r.headers.get('content-disposition')
        filename = content_disposition.split("=", 1)[-1]
        filename = filename.replace('"', "")
        filename = os.path.join(dest_path, filename)

        if skip_download:
            with open(filename, "wb") as f:
                progress = tqdm(total=content_size)
                for data in r.iter_content(chunk_size=16*1024):
                    f.write(data)    
                    progress.update(len(data))
                progress.refresh()
    except Exception as e:
        print(e)
        return None
    else:
        return filename
    
def unzip_dataset(dest_path, file, unzip_one_folder_up=True):
    """
    Utilitaire afficher progres pour dezipper fichier
    """
    try:
        with zipfile.ZipFile(file=file) as zip_file:
            infolist = zip_file.infolist()
            progress = tqdm(iterable=infolist, 
                            total=len(infolist),
                            bar_format="{l_bar}{bar}{postfix}")

            for zip_info in progress:            
                # display filename in progress bar
                # but without start folder
                path, file = os.path.split(zip_info.filename)            
                _, path = os.path.split(path)
                one_up_path = os.path.join(path, file)

                progress.set_postfix_str(one_up_path)

                if zip_info.is_dir():
                    time.sleep(0.01)
                    continue

                if unzip_one_folder_up:
                    zip_info.filename = one_up_path

                zip_file.extract(zip_info, path=dest_path)

            progress.refresh()
    except Exception as e:
        print(e)
        return False
    else:
        return True
        
def install_dataset(dest_path, url):
    display_html(f"<b>Downloading</b> <i>{url}</i>")
    zip_file = download_dataset(dest_path, url)
    if zip_file is None:
        display_html(f"<b>Failed</b>")
        return False
    
    display_html(f"<b>Unzipping</b>")
    if not unzip_dataset(dest_path,  zip_file):
        display_html(f"<b>Failed</b>")
        return False
    
    display_html(f"<b>Cleaning</b>")
    os.remove(zip_file)
    
    return True

In [15]:
#
# installation du dataset
#
DATASET_INSTALL = True
DATASET_URL = "https://tinyurl.com/22tas3na"
DATASET_PATH = "dataset"

if DATASET_INSTALL: #  and not os.path.exists(DATASET_PATH):
    os.makedirs(DATASET_PATH, exist_ok=True)
    
    display_html(f"<b>Installing Dataset</b>")
    if not install_dataset(DATASET_PATH, DATASET_URL):
        display_html(f"<b>Dataset installation error</b>")
    else:
        display_html(f"<b>Dataset installed</b>")
        
else:
    display_html(f"<b>Dataset already installed</b>")

  0%|          