<a href="https://colab.research.google.com/github/classy-classifiers/cassava-classification/blob/main/Baseline_Kaggle_Submission.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## *Code copied from our Kaggle submission notebook - the paths here will not work outside of the Kaggle environment*

In [None]:
# import the requisite packages
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from keras import preprocessing

In [None]:
# credentials
FILEPATH = '../input/cassava-leaf-disease-classification'
RESULTSPATH = './'

In [None]:
# list of image file names
train_imgs_dir = os.path.join(FILEPATH, 'train_images')
img_names = os.listdir(train_imgs_dir)

In [None]:
labels_df['label'].value_counts()
#-----------------------------------------------------------------------------------------------------
# def baseline_classifier_1(img): return 3
# test_images = os.listdir(os.path.join(FILEPATH, 'test_images'))
# predictions = []
# for image in test_images:
#     predictions.append(baseline_classifier_1(image))
# sub = pd.DataFrame({'image_id': test_images, 'label': predictions})
# sub.to_csv(os.path.join(RESULTSPATH, 'submission_baseline_1.csv'), index = False)

In [None]:
# changing the labels to the class names because the data loader needs string
labels_map_short = {0: 'CBB', 1: 'CBSD', 2: 'CGM', 3: 'CMD', 4: 'Healthy'}
labels_df = labels_df.replace({"label": labels_map_short})
labels_df.head()

In [None]:
base_train_datagen = preprocessing.image.ImageDataGenerator(validation_split = 0.2)
base_train_gen = base_train_datagen.flow_from_dataframe(dataframe=labels_df,
                                                        directory=train_imgs_dir,
                                                        subset = 'training',
                                                        x_col='image_id',
                                                        y_col='label',
                                                        target_size=(600, 800),
                                                        batch_size=128,
                                                        labels=list(labels_df['label']),
                                                        class_mode='categorical')
base_val_datagen = preprocessing.image.ImageDataGenerator(validation_split = 0.2)
base_val_gen = base_val_datagen.flow_from_dataframe(dataframe=labels_df,
                                                    directory=train_imgs_dir,
                                                    subset='validation',
                                                    x_col='image_id',
                                                    y_col='label',
                                                    target_size=(600, 800),
                                                    batch_size=128,
                                                    labels=list(labels_df['label']),
                                                    class_mode='categorical')


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, AvgPool2D
from tensorflow.keras import callbacks
from tensorflow import optimizers
def baseline_net():
    model = Sequential()
    model.add(Conv2D(filters=6, 
                     kernel_size=5, 
                     activation='relu',
                     padding='same', 
                     input_shape = [600, 800, 3]))
    model.add(AvgPool2D(pool_size=2, strides=2))
    model.add(Conv2D(filters=16,
                     kernel_size=5,
                     activation='relu'))
    model.add(AvgPool2D(pool_size=2, strides=2))
    model.add(Flatten())
    model.add(Dense(60, activation='relu'))
    model.add(Dense(42, activation='relu'))
    model.add(Dense(5, activation='softmax'))
    return model
baseline_cnn = baseline_net()
baseline_cnn.summary()


In [None]:
baseline_cnn.compile(optimizer = optimizers.Adam(lr = 0.001),
                     loss = "categorical_crossentropy",
                     metrics = ["accuracy"])
early_stop = callbacks.EarlyStopping(monitor = 'val_loss', 
                                     min_delta = 0.001, 
                                     patience = 5, 
                                     mode = 'min', 
                                     verbose = 1,
                                     restore_best_weights = True)
base_history = baseline_cnn.fit(base_train_gen,
                                epochs=10,
                                callbacks=early_stop,
                                verbose=1,
                                validation_data=base_val_gen)


In [None]:
plt.plot(base_history.history['accuracy'])
plt.plot(base_history.history['val_accuracy'])
plt.title('Base Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.grid()
plt.savefig('base_cnn_acc.png', dpi=100)
plt.show()
plt.plot(base_history.history['loss'])
plt.plot(base_history.history['val_loss'])
plt.title('Base Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.grid()
plt.savefig('base_cnn_loss.png', dpi=100)
plt.show()


In [None]:
test_path = '../input/cassava-leaf-disease-classification/test_images'
test_images = os.listdir(test_path)
predictions = []
for image_id in test_images:
    image = Image.open(os.path.join(test_path, image_id))
    image = np.array(image)
    predictions.append(np.argmax(baseline_cnn.predict(image)))
sub = pd.DataFrame({'image_id': test_images, 'label': predictions})
sub.to_csv(os.path.join(RESULTSPATH, 'submission.csv'), index = False)