In [2]:
import pandas as pd
import numpy as np
import pylab as plt
import glob
import os
import random
import keras
from keras import backend as K
from keras import layers
from keras import metrics
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Sequential, Model
from keras.utils import plot_model
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import cv2
from tqdm import tqdm
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from collections import Counter
from sklearn.model_selection import train_test_split
import sklearn
from collections import Counter
from pandas import DataFrame

In [5]:
def load_data(new_w,new_h):
    train_labels = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv', sep=',')
    path = '../input/cassava-leaf-disease-classification/train_images'
    path_loop = r'../input/cassava-leaf-disease-classification/train_images/*.*'
    onlyfiles = next(os.walk(path))[2] #dir is your directory path as string
    numOfFiles = len(onlyfiles)
    data = []
    for file in tqdm(glob.glob(path_loop)):
        a=cv2.imread(file)
        name_file = os.path.basename(file)
        label = train_labels.loc[train_labels['image_id'] == name_file,'label'].values
        #conversion numpy array into rgb image to show
        c = cv2.cvtColor(a, cv2.COLOR_BGR2RGB)
        h, w, channels = c.shape
        #input size of Resnet architecture
        frame_rgb = cv2.resize(c,(new_w,new_h),interpolation=cv2.INTER_CUBIC)
        data.append([frame_rgb,label])
    return data


In [14]:
data = load_data(244, 244)

100%|██████████| 21397/21397 [08:22<00:00, 42.56it/s]


In [6]:
def train_test_split_modified(data):
    x_data = []
    y_data = []
    
    for feature, label in data:
        x_data.append(feature)
        y_data.append(label)
        
    X_data, X_test, Y_data, Y_test = sklearn.model_selection.train_test_split(x_data, y_data, stratify = y_data, test_size=0.1, random_state=42)
    X_model_1, X_aux, Y_model_1, Y_aux = sklearn.model_selection.train_test_split(X_data, Y_data, stratify = Y_data, test_size=0.66, random_state=42)
    X_model_2, X_model_3, Y_model_2, Y_model_3 = sklearn.model_selection.train_test_split(X_aux, Y_aux, stratify = Y_aux, test_size=0.5, random_state=42)
    return X_test, Y_test, X_model_1, Y_model_1, X_model_2, Y_model_2, X_model_3, Y_model_3
    


In [7]:
def data_preparation(X_data, Y_data, new_h, new_w, channels):
    x_train,x_val,y_train,y_val = train_test_split(X_data,Y_data,stratify=Y_data, train_size=0.8, random_state=42)
    x_train = np.array(x_train) / 255
    x_val = np.array(x_val) / 255
    y_train = np.array(y_train)
    y_val = np.array(y_val)
    x_train = x_train.reshape(-1, new_h, new_w, channels)
    x_val = x_val.reshape(-1, new_h, new_w, channels)

    return x_train, x_val, y_train, y_val

In [8]:
#Encode target variables
def encode_target_variable(y_train,y_val):
    onehot_encoder = OneHotEncoder(sparse=False)
    y_train = y_train.reshape(-1,1)
    y_val = y_val.reshape(-1,1)
    y_train_onehot = onehot_encoder.fit_transform(y_train)
    y_val_onehot = onehot_encoder.fit_transform(y_val)
    return y_train_onehot, y_val_onehot

In [9]:
def create_model_1(x_train,x_val):
    resnet50 = ResNet50(weights='imagenet', include_top=False, input_shape=(244, 244, 3))
    resnet50.trainable = False # remove if you want to retrain resnet weights
    resnet50.summary()
    transfer_model_1 = Sequential()
    transfer_model_1.add(resnet50)
    transfer_model_1.add(Flatten())
    transfer_model_1.add(Dense(128, activation='relu'))
    transfer_model_1.add(Dropout(0.2))
    transfer_model_1.add(Dense(5, activation='softmax'))
    x_train_new = x_train
    x_val_new = x_val
    x_train_new = keras.applications.resnet50.preprocess_input(x_train_new)
    x_val_new = keras.applications.resnet50.preprocess_input(x_val_new)
    return transfer_model_1, x_train_new, x_val_new

In [10]:
def create_model_2():
    vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(244, 244, 3))
    vgg16.trainable = False # remove if you want to retrain resnet weights
    vgg16.summary()
    transfer_model_2 = Sequential()
    transfer_model_2.add(vgg16)
    transfer_model_2.add(Flatten())
    transfer_model_2.add(Dense(128, activation='relu'))
    transfer_model_2.add(Dropout(0.2))
    transfer_model_2.add(Dense(5, activation='softmax'))
    x_train_new = x_train
    x_val_new = x_val
    x_train_new = VGG16.preprocess_input(x_train_new)
    x_val_new = VGG16.preprocess_input(x_val_new)
    return transfer_model_2, x_train_new, x_val_new

In [11]:
def create_model_3():
    inceptionV3 = InceptionV3(weights='imagenet', include_top=False, input_shape=(244, 244, 3))
    inceptionV3.trainable = False # remove if you want to retrain resnet weights
    inceptionV3.summary()
    transfer_model_3 = Sequential()
    transfer_model_3.add(inceptionV3)
    transfer_model_3.add(Flatten())
    transfer_model_3.add(Dense(128, activation='relu'))
    transfer_model_3.add(Dropout(0.2))
    transfer_model_3.add(Dense(5, activation='softmax'))
    x_train_new = x_train
    x_val_new = x_val
    x_train_new = InceptionV3.preprocess_input(x_train_new)
    x_val_new = InceptionV3.preprocess_input(x_val_new)
    return transfer_model_3, x_train_new, x_val_new

In [12]:
def train_model(model, batch_size, epochs, x_train_new, x_val_new, y_train_onehot, y_val_onehot):
    
    batch_size = batch_size
    epochs = epochs
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
              metrics=['accuracy'])

    history = model.fit(x_train_new, y_train_onehot, \
                              batch_size=batch_size, epochs=epochs, \
                              validation_split=0.2, verbose=1, shuffle=True, validation_data=(x_val_new, y_val_onehot))
    return model

In [13]:
Y_model_1_df = DataFrame(Y_model_1,columns=['labels'])
Y_model_1_df.labels.value_counts()

NameError: name 'Y_model_1' is not defined

In [None]:
Y_model_2_df = DataFrame(Y_model_2,columns=['labels'])
Y_model_2_df.labels.value_counts()

In [None]:
Y_model_3_df = DataFrame(Y_model_3,columns=['labels'])
Y_model_3_df.labels.value_counts()

In [None]:
#deaseases explanation: {"0": "Cassava Bacterial Blight (CBB)", 
#"1": "Cassava Brown Streak Disease (CBSD)", "2": "Cassava Green Mottle (CGM)", 
#"3": "Cassava Mosaic Disease (CMD)", "4": "Healthy"}

#visualize each of the classes
fig = plt.figure(figsize=(10, 6))

for i in range(8):
    img = X_model_1[i]
    fig.add_subplot(2, 4, i+1)
    plt.imshow(img)
    plt.title(Y_model_1[i])

In [None]:
################## Main function ######################



new_h = 244
new_w = 244
batch_size = 500
epochs = 10
channels = 3
#Load the data resizing the images to 244x244
######## CAMBIAR load_data ################
#training = load_data(new_h,new_w)
#Divide data into test and training/validation for three different models
X_test, Y_test, X_model_1, Y_model_1, X_model_2, Y_model_2, X_model_3, Y_model_3 = train_test_split_modified(training)
training_data = [X_model_1, Y_model_1, X_model_2, Y_model_2, X_model_3, Y_model_3]
#Resnet50
x_train, x_val, y_train, y_val = data_preparation(training_data[0], training_data[1], new_h, new_w, channels)
y_train_onehot, y_val_onehot = encode_target_variable(y_train,y_val)
resnet50_model, x_train_new, x_val_new = create_model_1(x_train,x_val)
resnet50_model = train_model(resnet50_model, batch_size, epochs, x_train_new, x_val_new, y_train_onehot, y_val_onehot)
#vgg16
x_train, x_val, y_train, y_val = data_preparation(trainig_data[2], trainig_data[3], new_h, new_w, channels)
y_train_onehot, y_val_onehot = encode_target_variable(y_train,y_val)
vgg16_model, x_train_new, x_val_new = create_model_2(x_train,x_val)
vgg16_model = train_model(vgg16_model, batch_size, epochs, x_train_new, x_val_new, y_train_onehot, y_val_onehot)
#InceptionV3
x_train, x_val, y_train, y_val = data_preparation(trainig_data[4], trainig_data[5], new_h, channels)
y_train_onehot, y_val_onehot = encode_target_variable(y_train,y_val)
inception_model, x_train_new, x_val_new = create_model_3(x_train,x_val)
inception_model = train_model(inception_model, batch_size, epochs, x_train_new, x_val_new, y_train_onehot, y_val_onehot)