<a href="https://colab.research.google.com/github/alex-bolshunov/Plant-Disease-Detection/blob/main/plant_disease_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install img2vec-pytorch

In [2]:
import os
import pandas as pd
from img2vec_pytorch import Img2Vec
from google.colab import drive
from PIL import Image

import matplotlib.pyplot as plt
import numpy as np

In [3]:
N_ROWS = 3
N_FEATURES = 512
IMG = "image_path"
LBL = "label"
DGR = "degrees"
SCL = "scale"

PROB = 0.5

In [4]:
class Image_Augmentation:
  def __init__(self):
    self.prob = 0.5

  def _random_binary(self, prob):
    """
    Accept probability from 0 to 1
    prob: probability of 1, float
    Return 0 or 1 based on probability
    """
    if prob is None: prob = self.prob

    return np.random.choice([0,1], p = [1 - prob, prob])

  # def random_img_rescaling(path, prob):


  def random_img_rotation(self, img, prob = None):
    """
    Accept image, and probability of rotation.
    path: string,
    path float.
    Return roatated images with probability of prob, degree of rotation
    """

    degrees = (90, 180, 270)
    degree = 0

    if self._random_binary(prob):
      degree = np.random.choice(degrees)
      img = img.rotate(degree, expand = True)

    return img, degree

  def random_img_rescaling(self, img, prob = None, scale = [1]):
    """
    Accepts image, probability of rescaling, list of scale values
    path: string,
    prob: float,
    scale: list of floats
    Return rescaled images with probability of prob, scale
    """
    s = 1

    if self._random_binary(prob):
      s = np.random.choice(scale)
      new_size = (int(img.width * s), int(img.height * s))
      img = img.resize(new_size, Image.Resampling.LANCZOS)

    return img, s

In [5]:
%%capture

#mount drive
drive.mount('/content/drive')

In [6]:
#google drive path
dataset_path = "/content/drive/MyDrive/project_data_potatos"

#classes of interest
classes = ("Potato___Early_blight", "Potato___healthy", "Potato___Late_blight")

#data dictionary
path_dict = {
    IMG: [],
    DGR: [],
    SCL: [],
    LBL: []
}

#feature dict
features = []

#errors
err_indecies = []

In [7]:
%%capture

#initiate img2vec
img2vec = Img2Vec()

#initiate class instance
img_aug = Image_Augmentation()

In [8]:
#loop over the folder, save paths to images and associated class label into data dict, create a dataframe
#extracting features
for class_name in os.listdir(dataset_path):

  if class_name in classes:
    directory_path = os.path.join(dataset_path, class_name)

    for i, image_name in enumerate(os.listdir(directory_path)):
      image_path = os.path.join(directory_path, image_name)


      try:
        img = Image.open(image_path)
        img, degree = img_aug.random_img_rotation(img) #randomly rotate image
        img, scale = img_aug.random_img_rescaling(img, scale = [1.2, 1.5, 1.7]) #randomly scale image

        fs = img2vec.get_vec(img)

        path_dict[IMG].append(image_path)
        path_dict[DGR].append(degree)
        path_dict[SCL].append(scale)
        path_dict[LBL].append(class_name)
        features.append(fs)
      except:
         err_indecies.append(i)


df_path = pd.DataFrame(path_dict)
df = pd.DataFrame(features).join(df_path.loc[:, LBL])

In [9]:
#save df
df_path.to_csv("paths.csv", index = False)
df.to_csv("dataframe.csv", index = False)

In [10]:
for e in err_indecies: print(f"Error index: {e}")