<a href="https://colab.research.google.com/github/Vaibhavsharma0209/Plant-Pathology/blob/master/plant_pathelogy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from zipfile import ZipFile
import os

from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

!pip install efficientnet
from efficientnet.tfkeras import EfficientNetB7

 **TPU preparation**

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

try:
  tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
  print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
  raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

print("REPLICAS: ", tpu_strategy.num_replicas_in_sync)

In [None]:
IMG_SIZE = 800
BATCH_SIZE = 8* tpu_strategy.num_replicas_in_sync
classes = 4

**Loading data**

In [None]:
with ZipFile('/content/drive/My Drive/Plant/plant-pathology-2020-fgvc7.zip') as f:
    print('Extracting')
    f.extractall()
    print('Done!!')

In [None]:
gcs_path = 'gs://kds-4d598c666e2db12886904a0a2d808a1259db3c0910143721bab174d1'
img_path = '/images/'

train_csv = pd.read_csv('train.csv')
labels = train_csv.iloc[:,1:].values

images_path = np.array([f'{gcs_path}{img_path}{image_id}.jpg' for image_id in train_csv['image_id']])

**Split data into train and validation set**

In [None]:
train_images, val_images, train_labels, val_labels = train_test_split(images_path ,labels , test_size=0.2, shuffle=True, random_state = 200)

**Class weights**


In [None]:
class_weights = compute_class_weight('balanced', np.unique(np.argmax(labels, axis = 1)), np.argmax(labels, axis = 1))

functions to image preprocessing

In [None]:
def decode_image(filename, label=None):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, (IMG_SIZE,IMG_SIZE))

    if label is None:
        return image
    else:
        return image, label

def data_augment(filename, label=None, seed=200):
    image, label = decode_image(filename, label)
    image = tf.image.random_flip_left_right(image, seed=seed)
    image = tf.image.random_flip_up_down(image, seed=seed)
    image = tf.image.rot90(image)
           
    if label is None:
        return image
    else:
        return image, label

**Preparing train and validation sets**

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_images, train_labels))
    .map(data_augment, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .repeat()
    .prefetch(AUTO)
    )

In [None]:
val_dataset = (
    tf.data.Dataset
    .from_tensor_slices((val_images,val_labels))
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(val_images.shape[0])
    .cache()
    .prefetch(AUTO)
     )

**Model architecture**

In [None]:
def create_model(trainable = True):
    
    #Model structure
    efficientnet = EfficientNetB7(weights = 'noisy-student', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling = 'avg')
    output = Dense(classes, activation="softmax")(efficientnet.output)

    model = Model(inputs=efficientnet.input, outputs=output)

    if trainable == False:
        model.trainable = False

    print(model.summary())

    return model

In [None]:
with tpu_strategy.scope():
    model = convnet()

#Compilation of model
model.compile(optimizer= Adam(0.0005), loss= 'categorical_crossentropy', metrics=['accuracy'])

### **Callbacks**

In [None]:
early_stopping = EarlyStopping(monitor = 'val_loss', patience = 5, mode = 'min')
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.6, patience = 2, mode = 'min', min_lr= 0.0000001)
checkpoint = ModelCheckpoint(checkpoint_name, save_best_only= True, save_weights_only= True ,mode = 'min', monitor= 'val_loss', verbose = 1)
#lr_schedule = LearningRateScheduler(schedule= lrschedule, verbose = 1)

In [None]:
STEPS_PER_EPOCH = train_images.shape[0] // BATCH_SIZE
EPOCHS = 20

In [None]:
class_dict = {i:val  for i, val in enumerate(list(class_weights))}

In [None]:
history = model.fit(train_dataset,
                    steps_per_epoch=STEPS_PER_EPOCH,
                    epochs=EPOCHS,
                    verbose=1,
                    validation_data=val_dataset,
                    class_weight = class_dict,
                    callbacks = [early_stopping, reduce_lr, checkpoint]
                    )

In [None]:
def loss_acc_plot(history, accuracy = False):
    
    data = pd.DataFrame(history.history)

    plt.title('Training Loss vs Validation Loss')
    plt.plot(data['loss'], c = 'b', label = 'loss', )
    plt.plot(data['val_loss'], c = 'orange', label = 'val_loss')
    plt.legend()
    plt.show()

    if accuracy == True:
        plt.title('Training Accuracy vs Validation Accuracy')
        plt.plot(data['accuracy'], c = 'b', label = 'accuracy')
        plt.plot(data['val_accuracy'], c = 'orange', label = 'val_accuracy')
        plt.legend()
        plt.show()

In [None]:
loss_acc_plot(history, accuracy= True)

In [None]:
dev_pred = model.predict(val_dataset)

def make_prediction_label(label_data):
    
    pred_label = np.zeros(shape = label_data.shape, dtype = 'int')
    argmax = np.argmax(label_data, axis = 1)

    for idx in range(label_data.shape[0]):
        max_col = argmax[idx]
        pred_label[idx][max_col] = int(1)

    return pred_label

pred_label = make_prediction_label(dev_pred)


In [None]:
def plot_cm(true_labels, pred_labels, label_name):
    max_true = np.argmax(true_labels, axis = 1)
    max_pred = np.argmax(pred_labels, axis = 1)

    assert true_labels.shape == pred_labels.shape

    matrix = np.zeros(shape = (4,4), dtype = 'int')

    for idx in range(true_labels.shape[0]):
        matrix[max_true[idx]][max_pred[idx]] = matrix[max_true[idx]][max_pred[idx]] + 1
    
    matrix = pd.DataFrame(matrix, index = label_name, columns= label_name)

    return matrix

cm_matrix = plot_cm(val_labels, pred_label, ['h', 'm', 'r', 's'])

In [None]:
cm_matrix