# Images Preprocessing - Extended Validation

In [1]:
import os, cv2, random, shutil
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm
from mpl_toolkits.axes_grid1 import ImageGrid

In [2]:
DATASETS_FOLDER = "./datasets"

PATH_BRACOL = DATASETS_FOLDER + "/bracol/symptom"
PATH_BRACOL_ORIGINAL_SPLITTED = DATASETS_FOLDER + "/bracol/original_splitted"
PATH_BRACOL_RESIZED_SPLITTED = DATASETS_FOLDER + "/bracol/resized_splitted"

PATH_PLANT_PATOLOGIES = DATASETS_FOLDER + "/plant_patologies"
PATH_PLANT_PATOLOGIES_ORIGINAL = PATH_PLANT_PATOLOGIES + "/original"
PATH_PLANT_PATOLOGIES_RESIZED = PATH_PLANT_PATOLOGIES + "/resized"

PATH_ROCOLE = DATASETS_FOLDER + "/rocole/classes"
PATH_ROCOLE_ORIGINAL = DATASETS_FOLDER + "/rocole/original"
PATH_ROCOLE_RESIZED = DATASETS_FOLDER + "/rocole/resized"

## 1. Adjusting datasets folders

### 1.1. BRACOL

In [3]:
# Renaming 'symptom' folder to 'original_splitted'
os.rename(PATH_BRACOL, PATH_BRACOL_ORIGINAL_SPLITTED)

### 1.2. Plant Patologies

In [4]:
# Moving classes folders to 'original' folder
directories = os.listdir(PATH_PLANT_PATOLOGIES)

if not os.path.exists(PATH_PLANT_PATOLOGIES_ORIGINAL):
    os.mkdir(PATH_PLANT_PATOLOGIES_ORIGINAL)
        
for directory in directories:    
    source = f"{PATH_PLANT_PATOLOGIES}/{directory}"
    destination = f"{PATH_PLANT_PATOLOGIES_ORIGINAL}/{directory.split('_')[0]}"
    shutil.move(source, destination)

### 1.3. RoCoLe

In [5]:
# Renaming 'classes' folder to 'original'
os.rename(PATH_ROCOLE, PATH_ROCOLE_ORIGINAL)

## 2. Resizing datasets images for 128x128

### 2.1. BRACOL

#### Listing directories

In [6]:
directories = os.listdir(PATH_BRACOL_ORIGINAL_SPLITTED)
directories

['val', 'test', 'train']

#### Resizing images

In [7]:
for directory in directories:
    folders = os.listdir(f"{PATH_BRACOL_ORIGINAL_SPLITTED}/{directory}")
    
    for folder in folders:
        root = f"{PATH_BRACOL_ORIGINAL_SPLITTED}/{directory}/{folder}"
        files = next(os.walk(root))[2]

        for file in tqdm(files, "Resizing images"):
            filepath = os.path.join(root, file)
            
            try:
                with Image.open(filepath) as i:
                    image = cv2.imread(f"{root}/{file}")
                    image_resized = cv2.resize(image, dsize=(128, 128), interpolation=cv2.INTER_CUBIC)

                    images_resized_path = f"{PATH_BRACOL_RESIZED_SPLITTED}/{directory}/{folder.split('_')[1]}"

                    if not os.path.exists(images_resized_path):
                        os.makedirs(images_resized_path)

                    cv2.imwrite(f"{images_resized_path}/{file}", image_resized)
            except Exception as e:
                pass

        print(f"Images from {directory}/{folder} folder resized!")

Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 93/93 [00:00<00:00, 807.19it/s]


Images from val/3_rust folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [00:00<00:00, 863.71it/s]


Images from val/1_health folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 874.80it/s]


Images from val/2_miner folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 49/49 [00:00<00:00, 893.64it/s]


Images from val/5_cercospora folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 763.33it/s]


Images from val/4_phoma folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 951.91it/s]


Images from test/3_rust folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:00<00:00, 942.57it/s]


Images from test/1_health folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 840.07it/s]


Images from test/2_miner folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 910.31it/s]


Images from test/5_cercospora folder resized!


Resizing images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 69/69 [00:00<00:00, 820.43it/s]


Images from test/4_phoma folder resized!


Resizing images: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 434/434 [00:00<00:00, 889.53it/s]


Images from train/3_rust folder resized!


Resizing images: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 182/182 [00:00<00:00, 881.55it/s]


Images from train/1_health folder resized!


Resizing images: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 374/374 [00:00<00:00, 878.62it/s]


Images from train/2_miner folder resized!


Resizing images: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 224/224 [00:00<00:00, 870.99it/s]


Images from train/5_cercospora folder resized!


Resizing images: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 325/325 [00:00<00:00, 819.37it/s]

Images from train/4_phoma folder resized!





### 2.2. Plant Patologies

#### Listing directories

In [8]:
directories = os.listdir(PATH_PLANT_PATOLOGIES_ORIGINAL)
directories

['miner', 'rust']

#### Resizing images

In [9]:
for directory in directories:
    root = f"{PATH_PLANT_PATOLOGIES_ORIGINAL}/{directory}"
    files = next(os.walk(root))[2]
    files = [file for file in files if (".jpg" in file) or (".png" in file)]
    
    for file in tqdm(files, "Resizing images"):
        image = cv2.imread(f"{root}/{file}")
        image_resized = cv2.resize(image, dsize=(128, 128), interpolation=cv2.INTER_CUBIC)

        images_resized_path = f"{PATH_PLANT_PATOLOGIES_RESIZED}/{directory}"

        if not os.path.exists(images_resized_path):
            os.makedirs(images_resized_path)

        cv2.imwrite(f"{images_resized_path}/{file}", image_resized)
        
    print(f"Images from {directory} folder resized!")

Resizing images: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 257/257 [00:31<00:00,  8.19it/s]


Images from miner folder resized!


Resizing images: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 285/285 [00:34<00:00,  8.30it/s]

Images from rust folder resized!





### 2.3. RoCoLe

#### Listing directories

In [10]:
directories = os.listdir(PATH_ROCOLE_ORIGINAL)
directories

['rust', 'healthy', 'red_spider_mite']

#### Resizing images

In [11]:
for directory in directories:
    root = f"{PATH_ROCOLE_ORIGINAL}/{directory}"
    files = next(os.walk(root))[2]
    
    for file in tqdm(files, "Resizing images"):
        image = cv2.imread(f"{root}/{file}")
        image_resized = cv2.resize(image, dsize=(128, 128), interpolation=cv2.INTER_CUBIC)
        
        images_resized_path = f"{PATH_ROCOLE_RESIZED}/{directory}"
        
        if not os.path.exists(images_resized_path):
            os.makedirs(images_resized_path)
            
        cv2.imwrite(f"{images_resized_path}/{file}", image_resized)
        
    print(f"Images from {directory} folder resized!")

Resizing images:   0%|                                                                                                      | 0/602 [00:00<?, ?it/s]Invalid SOS parameters for sequential JPEG
Resizing images:   1%|▌                                                                                             | 4/602 [00:00<00:17, 33.24it/s]Invalid SOS parameters for sequential JPEG
Resizing images:   1%|█▏                                                                                            | 8/602 [00:00<00:17, 34.09it/s]Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Resizing images:   2%|██▎                                                                                          | 15/602 [00:00<00:12, 47.54it/s]Invalid SOS parameters for sequential JPEG
Invalid SOS parameter

Images from rust folder resized!


Resizing images:   0%|                                                                                                      | 0/791 [00:00<?, ?it/s]Invalid SOS parameters for sequential JPEG
Resizing images:   0%|▎                                                                                             | 3/791 [00:00<00:48, 16.10it/s]Invalid SOS parameters for sequential JPEG
Resizing images:   1%|▊                                                                                             | 7/791 [00:00<00:30, 25.56it/s]Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Resizing images:   2%|█▉                                                                                           | 16/791 [00:00<00:39, 19.43it/s]Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Resizing images:   3%|██▌                                                                                  

Images from healthy folder resized!


Resizing images:  13%|███████████▋                                                                                 | 21/167 [00:01<00:12, 11.64it/s]Invalid SOS parameters for sequential JPEG
Resizing images:  22%|████████████████████                                                                         | 36/167 [00:02<00:10, 12.85it/s]Invalid SOS parameters for sequential JPEG
Resizing images:  26%|████████████████████████▌                                                                    | 44/167 [00:03<00:06, 17.90it/s]Invalid SOS parameters for sequential JPEG
Resizing images:  28%|█████████████████████████▌                                                                   | 46/167 [00:03<00:07, 16.85it/s]Invalid SOS parameters for sequential JPEG
Resizing images:  29%|██████████████████████████▋                                                                  | 48/167 [00:03<00:07, 15.81it/s]Invalid SOS parameters for sequential JPEG
Resizing images:  31%|███████████████████████

Images from red_spider_mite folder resized!





## 3. Verifying if all images was resized

### 3.1. BRACOL

In [12]:
dimentions = {}

In [13]:
for root, _, files in os.walk(PATH_BRACOL_RESIZED_SPLITTED):
    for file in files:
        if "ipynb_checkpoints" == file.split('.')[-1]:
            os.remove(f"{root}/{file}")
        
        try:
            image = Image.open(f"{root}/{file}")
            size = f"{image.size[0]} x {image.size[1]}"

            if size not in dimentions:
                dimentions[size] = 1
            else:
                dimentions[size] += 1
        except Exception as e:
            print(e)

In [14]:
for size in sorted(dimentions, key=dimentions.get, reverse=True):
    print(f"{size} = {dimentions[size]} images")

128 x 128 = 2209 images


### 3.2. Plant Patologies

In [15]:
dimentions = {}

In [16]:
for root, _, files in os.walk(PATH_PLANT_PATOLOGIES_RESIZED):
    for file in files:
        try:
            image = Image.open(f"{root}/{file}")
            size = f"{image.size[0]} x {image.size[1]}"

            if size not in dimentions:
                dimentions[size] = 1
            else:
                dimentions[size] += 1
        except Exception as e:
            print(e)

In [17]:
for size in sorted(dimentions, key=dimentions.get, reverse=True):
    print(f"{size} = {dimentions[size]} images")

128 x 128 = 542 images


### 3.3. RoCoLe

In [18]:
dimentions = {}

In [19]:
for root, _, files in os.walk(PATH_ROCOLE_RESIZED):
    for file in files:
        try:
            image = Image.open(f"{root}/{file}")
            size = f"{image.size[0]} x {image.size[1]}"

            if size not in dimentions:
                dimentions[size] = 1
            else:
                dimentions[size] += 1
        except Exception as e:
            print(e)

In [20]:
for size in sorted(dimentions, key=dimentions.get, reverse=True):
    print(f"{size} = {dimentions[size]} images")

128 x 128 = 1560 images
