In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

from tensorflow import keras
from keras.models import Sequential,load_model
from keras.layers import Dense,GlobalAveragePooling2D,Flatten,Conv2D,BatchNormalization,Dropout,MaxPooling2D,Activation
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator as Imgen

from PIL import Image
from sklearn.metrics import confusion_matrix,classification_report

In [None]:
train_df = pd.read_csv("../input/plant-pathology-2021-fgvc8/train.csv")

In [None]:
train_df.shape

In [None]:
train_df.labels.value_counts()

In [None]:
validation = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')
validation.head()

In [None]:
datagen = Imgen(preprocessing_function=keras.applications.xception.preprocess_input,
                 rotation_range=4,
                  shear_range=0.2,
                  zoom_range=0.2,
                  horizontal_flip=True,
                  validation_split=0.2
                 )

In [None]:
train_ds = datagen.flow_from_dataframe(
    train_df,
    directory = '../input/resized-plant2021/resized_plant2021/img_sz_384', 
    x_col = 'image',
    y_col = 'labels',
    subset="training",
    color_mode="rgb",
    target_size = (512,512),
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=123,
)


val_ds = datagen.flow_from_dataframe(
    train_df,
    directory = '../input/resized-plant2021/resized_plant2021/img_sz_384',
    x_col = 'image',
    y_col = 'labels',
    subset="validation",
    color_mode="rgb",
    target_size = (512,512),
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=123,
)

In [None]:
a = train_ds.class_indices
class_names = list(a.keys())
class_names

In [None]:
x,y = next(train_ds)
x.shape

In [None]:
#plot function
def plot_images(img,labels):
    plt.figure(figsize=(15,10))
    for i in range(25):
        plt.subplot(5,5,i+1)
        plt.imshow(img[i])
        plt.title(class_names[np.argmax(labels[i])])
        plt.axis('off')

In [None]:
plot_images(x,y)

In [None]:
model = Sequential([
    
    Conv2D(32,(3,3),activation='relu',input_shape=(512,512,3)),
    MaxPooling2D((2,2)),
    
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    
    Conv2D(64,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    
    Conv2D(128,(3,3),activation='relu'),
    MaxPooling2D((2,2)),
    
    Flatten(),
    
    Dense(128,activation='relu'),
    Dropout(0.5),
    
    Dense(64,activation='relu'),
    Dropout(0.5),
    
    Dense(12,activation='softmax')
    
])

model.summary()

In [None]:
keras.utils.plot_model(model,
                      show_shapes=True,
                      show_dtype=True,
                      show_layer_names=True)

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy']) 

In [None]:
my_calls = [keras.callbacks.EarlyStopping(monitor='val_accuracy',patience=2),
            keras.callbacks.ModelCheckpoint("Model_xcp.h5",verbose=1,save_best_only=True)]

In [None]:
hist = model.fit(train_ds,epochs=25,validation_data=val_ds,callbacks=my_calls)

In [None]:
scores = model.evaluate(val_ds, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
plt.figure(figsize=(15,6))

plt.subplot(1,2,1)
plt.plot(hist.epoch,hist.history['accuracy'],label = 'Training')
plt.plot(hist.epoch,hist.history['val_accuracy'],label = 'validation')

plt.title("Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(hist.epoch,hist.history['loss'],label = 'Training')
plt.plot(hist.epoch,hist.history['val_loss'],label = 'validation')

plt.title("Loss")
plt.legend()
plt.show()

In [None]:
validation.head()

In [None]:
test_gen = Imgen(preprocessing_function=keras.applications.xception.preprocess_input)

test_ds = test_gen.flow_from_dataframe(validation,
                                      directory='../input/plant-pathology-2021-fgvc8/test_images',
                                      x_col = 'image',
                                       y_col = None,
                                       class_mode=None,
                                       target_size=(512,512)
                                      )

In [None]:
pred = model.predict(test_ds)
pred

In [None]:
label_final = []
pred = [np.argmax(i) for i in pred]
for i in pred:
    label_final.append(class_names[i])
print(label_final)

In [None]:
validation['final_label'] = label_final
validation.to_csv('submission.csv', index=False)

In [None]:
validation