<a href="https://colab.research.google.com/github/igorlauxen/local_grape_health/blob/main/Enhanced_Plant_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Para conectar kaggle e google driver seguir o tutorial do [Medium: How to fetch kaggle datasets into google colab](https://medium.com/analytics-vidhya/how-to-fetch-kaggle-datasets-into-google-colab-ea682569851a)

In [17]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [18]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/MyDrive/kaggle"
# /content/gdrive/My Drive/Kaggle is the path where kaggle.json is present in the Google Drive

In [19]:
!ls /content/gdrive/MyDrive/kaggle

#changing the working directory
%cd /content/gdrive/MyDrive/kaggle
#Check the present working directory using pwd command

!ls

ignore_folder  kaggle.json  label_transform.pkl  plantvillage  PlantVillage
/content/gdrive/MyDrive/kaggle
ignore_folder  kaggle.json  label_transform.pkl  plantvillage  PlantVillage


In [None]:
# só execute comando abaixo caso no seu google drie não tenha sido baixado as plantas ainda
#!kaggle datasets download -d emmarex/plantdisease

Downloading plantdisease.zip to /content/gdrive/My Drive/kaggle
100% 656M/658M [00:22<00:00, 36.3MB/s]
100% 658M/658M [00:22<00:00, 30.2MB/s]


In [None]:
# mesmo que passo anterior, caso há tenha sido executado uma vez, nao precisa ocorrer de novo
#unzipping the zip files and deleting the zip files
#!unzip \*.zip  && rm *.zip

Esse notebook baseia sua implementação em [Plant Disease Detection using Keras](https://www.kaggle.com/emmarex/plant-disease-detection-using-keras/data)

Dataset utilizado é [Plant Village Disease](https://www.kaggle.com/emmarex/plantdisease)

In [4]:
import numpy as np
import pickle
import cv2
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from keras.models import Sequential
#from keras.layers.normalization import BatchNormalization
from keras.layers import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Flatten, Dropout, Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
# from keras.optimizers import Adam precisa importar depois como keras.optimizers.Adam segundo https://keras.io/api/optimizers/
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [21]:
EPOCHS = 25
INIT_LR = 1e-3
BS = 32
default_image_size = tuple((256, 256))
image_size = 0
# NOTA: essa pasta pode variar de location depdendo de onde der a *cd* anteriormente
directory_root = 'plantvillage/'
width=256
height=256
# depth 3 significa considerar RGB
depth=3

In [20]:
!pwd

/content/gdrive/MyDrive/kaggle


In [6]:
def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            image = cv2.resize(image, default_image_size)   
            return img_to_array(image)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None

In [27]:
# o processamento abaixo levar alguns minutos (~15)
image_list, label_list = [], []
try:
    print("[INFO] Loading images ...")
    root_dir = listdir(directory_root)
    for directory in root_dir :
        # remove .DS_Store from list
        if directory == ".DS_Store" :
            root_dir.remove(directory)

    for plant_folder in root_dir :
        print(f"[INFO] The plant folder is {plant_folder} ...")
        plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}")
        print(f"[INFO] The plant disease folder list is {plant_disease_folder_list} ...")
        for disease_folder in plant_disease_folder_list :
            print(f"[INFO] The disease folder is {disease_folder} ...")
            # remove .DS_Store from list
            if disease_folder == ".DS_Store" :
                plant_disease_folder_list.remove(disease_folder)
            if disease_folder == "Tomato_Spider_mites_Two_spotted_spider_mite":
                print(f"[INFO] Folder {disease_folder} has been ignored ...")
                plant_disease_folder_list.remove(disease_folder)

        for plant_disease_folder in plant_disease_folder_list:
            print(f"[INFO] Processing {plant_disease_folder} ...")
            plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/{plant_disease_folder}/")
                
            for single_plant_disease_image in plant_disease_image_list :
                if single_plant_disease_image == ".DS_Store" :
                    plant_disease_image_list.remove(single_plant_disease_image)

            for image in plant_disease_image_list[:200]:
                image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"
                if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True:
                    image_list.append(convert_image_to_array(image_directory))
                    label_list.append(plant_disease_folder)
    print("[INFO] Image loading completed")  
except Exception as e:
    print(f"Error : {e}")

[INFO] Loading images ...
[INFO] The plant folder is PlantVillage ...
[INFO] The plant disease folder list is ['Pepper__bell___Bacterial_spot', 'Pepper__bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy'] ...
[INFO] The disease folder is Pepper__bell___Bacterial_spot ...
[INFO] The disease folder is Pepper__bell___healthy ...
[INFO] The disease folder is Potato___Early_blight ...
[INFO] The disease folder is Potato___Late_blight ...
[INFO] The disease folder is Potato___healthy ...
[INFO] Processing Pepper__bell___Bacterial_spot ...
[INFO] Processing Pepper__bell___healthy ...
[INFO] Processing Potato___Early_blight ...
[INFO] Processing Potato___Late_blight ...
[INFO] Processing Potato___healthy ...
[INFO] Image loading completed


In [28]:
image_size = len(image_list)
print("image size is ", image_size)

image size is  952


In [29]:
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)
pickle.dump(label_binarizer,open('label_transform.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)

In [30]:
print(label_binarizer.classes_)

['Pepper__bell___Bacterial_spot' 'Pepper__bell___healthy'
 'Potato___Early_blight' 'Potato___Late_blight' 'Potato___healthy']


In [46]:
# mudado de np.float16 para np.float32
# devido a exception no modelo tcc
np_image_list = np.array(image_list, dtype=np.float32) / 225.0

In [None]:
print(np_image_list)

In [47]:
print("[INFO] Spliting data to train, test")
# o que faz train_test_split?
# r: ele dividi o dataset em dados de treino e validação. 
# O parametro test_size é de 0 a 1 e é o que define o percentual que vai para validação
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42) 

[INFO] Spliting data to train, test


In [48]:
aug = ImageDataGenerator(
    rotation_range=25, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2, 
    zoom_range=0.2,horizontal_flip=True, 
    fill_mode="nearest")

In [49]:
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth, height, width)
    chanDim = 1
# tf.keras.layers.Conv2D(filters (32),kernel_size(3,3)
# padding pode ser same ou valid. valid: "valid" means no padding
#   same: results in padding with zeros evenly to the left/right or up/down of the input such that output has the same height/width dimension as the input.
# inputShape=(256,256,3): alta resolução com cores
model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
# What is activation function? an activation function is needed that looks and acts like a linear function, 
# but is, in fact, a nonlinear function allowing complex relationships in the data to be learned.
# What is Relu? Rectified Linear Unit (ReLU)
# linear function that will output the input directly if it is positive, otherwise, it will output zero.
model.add(Activation("relu"))
# Batch normalization applies a transformation that maintains the mean output close to 0 and the output standard deviation close to 1.
# The axis that should be normalized (typically the features axis). 
# For instance, after a Conv2D layer with data_format="channels_first", set axis=1 in BatchNormalization.
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation("softmax"))

In [50]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_19 (Conv2D)          (None, 256, 256, 32)      896       
                                                                 
 activation_13 (Activation)  (None, 256, 256, 32)      0         
                                                                 
 batch_normalization_12 (Bat  (None, 256, 256, 32)     128       
 chNormalization)                                                
                                                                 
 max_pooling2d_15 (MaxPoolin  (None, 85, 85, 32)       0         
 g2D)                                                            
                                                                 
 dropout_11 (Dropout)        (None, 85, 85, 32)        0         
                                                                 
 conv2d_20 (Conv2D)          (None, 85, 85, 64)       

In [51]:
from tensorflow import keras

# from keras.optimizers import a
opt = keras.optimizers.Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# distribution
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])
# train the network
print("[INFO] training network...")

[INFO] training network...


  super(Adam, self).__init__(name, **kwargs)


In [None]:
# cada epoch demora 10 minutos, processo abaixo demora MUITO
history = model.fit_generator(
    aug.flow(x_train, y_train, batch_size=BS),
    validation_data=(x_test, y_test),
    steps_per_epoch=len(x_train) // BS,
    epochs=EPOCHS, verbose=1
    )

  


Epoch 1/25
Epoch 2/25

In [38]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()

NameError: ignored

In [None]:
print("[INFO] Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")

In [None]:
# save the model to disk
print("[INFO] Saving model...")
pickle.dump(model,open('cnn_model.pkl', 'wb'))

In [43]:
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import RMSprop

# Our input feature map is 32x32x3: 32x32 for the image pixels, and 3 for
# the three color channels: R, G, and B
img_input = layers.Input(shape=(height, width, depth))

x = layers.Conv2D(32, 3, activation='relu')(img_input)
x = layers.MaxPooling2D(2)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Convolution2D(128, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)

# Flatten feature map to a 1-dim tensor
x = layers.Flatten()(x)

# Create a fully connected layer with ReLU activation and 512 hidden units
x = layers.Dense(512, activation='relu')(x)

x = layers.Dropout(0.5)(x)

# Create output layer with a single node and sigmoid activation
output = layers.Dense(1, activation='sigmoid')(x)

# Configure and compile the model
tcc_model = Model(img_input, output)
tcc_model.compile(loss='binary_crossentropy',
              optimizer=RMSprop(lr=0.001),
              metrics=['acc'])

  super(RMSprop, self).__init__(name, **kwargs)


In [44]:
tcc_model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d_16 (Conv2D)          (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 127, 127, 32)     0         
 g2D)                                                            
                                                                 
 conv2d_17 (Conv2D)          (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 62, 62, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_18 (Conv2D)          (None, 60, 60, 128)       7385

In [41]:
# cada epoch demora 10 minutos, processo abaixo demora MUITO
history = tcc_model.fit_generator(
    aug.flow(x_train, y_train, batch_size=BS),
    validation_data=(x_test, y_test),
    steps_per_epoch=len(x_train) // BS,
    epochs=EPOCHS, verbose=1
    )

history = model.fit_generator(
      aug.flow(x_train, y_train, batch_size=BS),
      steps_per_epoch=len(x_train),
      epochs=EPOCHS,
      validation_data=(x_test, y_test),
      validation_steps=50,
      verbose=2)

  


Epoch 1/25

ValueError: ignored

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()