<h1><center> 🍎 Classify foliar diseases in apple trees</center></h1>

# 1. Problem Statement ？

Apples are one of the most important temperate fruit crops in the world. Foliar (leaf) diseases pose a major threat to the overall productivity and quality of apple orchards. The current process for disease diagnosis in apple orchards is based on manual scouting by humans, which is time-consuming and expensive.

The main objective of the competition is to develop machine learning-based models to accurately classify a given leaf image from the test dataset to a particular disease category, and to identify an individual disease from multiple disease symptoms on a single leaf image.


## libraries 

In [None]:
!pip install opencv-python==3.4.2.17
!pip install opencv-contrib-python==3.4.2.17

In [None]:
import numpy as np
import pandas as pd
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
import os
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import random
import albumentations as A
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense,Activation,Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping

import seaborn as sns
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

tqdm.pandas()
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

np.random.seed(0)
tf.random.set_seed(0)

In [None]:
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

# 2. About Dataset

In [None]:
train_image_path = '../input/plant-pathology-2021-fgvc8/train_images/'
test_image_path = '../input/plant-pathology-2021-fgvc8/test_images'
train_df_path = '../input/plant-pathology-2021-fgvc8/train.csv'
test_df_path = '../input/plant-pathology-2021-fgvc8/sample_submission.csv'

> 📌**Note**:
* `train.csv` contains information about the image files available in `train_images`. It contains 18632 rows(images) with 2 columns i.e (image , labels )
* `test.csv` The test set images. This competition has a hidden test set: only three images are provided here as samples while the remaining 5,000 images will be available to your notebook once it is submitted.

In [None]:
df_train = pd.read_csv(train_df_path)
df_test = pd.read_csv(test_df_path)

In [None]:
df_train.head()

In [None]:
df_test

In [None]:
df_train.labels.value_counts()

In [None]:
plt.figure(figsize=(15,12))
labels = sns.barplot(df_train.labels.value_counts().index,df_train.labels.value_counts())
for item in labels.get_xticklabels():
    item.set_rotation(45)

> 📌**Note**:
* We have multiple labels for eg. label can be **scab** or **scab and rust**
* Main labels are - **scab** , **healthy** , **frog_eye_leaf_spot** , **rust** , **complex** and **powdery_mildew**

## Batch Visualisation of Images 

In [None]:
def batch_visualize(df,batch_size,path):
    sample_df = df_train.sample(9)
    image_names = sample_df["image"].values
    labels = sample_df["labels"].values
    plt.figure(figsize=(16, 12))
    
    for image_ind, (image_name, label) in enumerate(zip(image_names, labels)):
        plt.subplot(3, 3, image_ind + 1)
        image = cv2.imread(os.path.join(path, image_name))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(image)
        plt.title(f"{label}", fontsize=12)
        plt.axis("off")
    plt.show()
    
batch_visualize(df_train,9,train_image_path)

## Batch visualisation with labels

In [None]:
def batch_visualize_with_label(df,batch_size,path,label): 
    sample_df = df_train[df_train["labels"]==label].sample(9)
    image_names = sample_df["image"].values
    labels = sample_df["labels"].values
    plt.figure(figsize=(16, 12))
    
    for image_ind, (image_name, label) in enumerate(zip(image_names, labels)):
        plt.subplot(3, 3, image_ind + 1)
        image = cv2.imread(os.path.join(path, image_name))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        plt.imshow(image)
        plt.axis("off")
    plt.show()

### Visualise healthy leaves

In [None]:
batch_visualize_with_label(df_train,9,train_image_path,'healthy')

### Visualise scab leaves 

In [None]:
batch_visualize_with_label(df_train,9,train_image_path,'scab')

### Visualise frog_eye_leaf_spot  leaves

In [None]:
batch_visualize_with_label(df_train,9,train_image_path,'frog_eye_leaf_spot')

### Visualise rust leaves 

In [None]:
batch_visualize_with_label(df_train,9,train_image_path,'rust')

### Visualise complex leaves

In [None]:
batch_visualize_with_label(df_train,9,train_image_path,'complex')

### Visualise powdery_mildew leaves

In [None]:
batch_visualize_with_label(df_train,9,train_image_path,'powdery_mildew')

## Visualize with color histogram

In [None]:
SAMPLE_LEN = 100

def load_image(file_path):
    image = cv2.imread(train_image_path + file_path)
    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

train_images = df_train["image"][:SAMPLE_LEN].progress_apply(load_image)

### All channel values

In [None]:
red_values = [np.mean(train_images[idx][:, :, 0]) for idx in range(len(train_images))]
green_values = [np.mean(train_images[idx][:, :, 1]) for idx in range(len(train_images))]
blue_values = [np.mean(train_images[idx][:, :, 2]) for idx in range(len(train_images))]
values = [np.mean(train_images[idx]) for idx in range(len(train_images))]

In [None]:
fig = ff.create_distplot([values], group_labels=["Channels"], colors=["purple"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distribution of channel values")
fig.data[0].marker.line.color = 'rgb(0, 0, 0)'
fig.data[0].marker.line.width = 0.5
fig

### Red channel values

In [None]:
fig = ff.create_distplot([red_values], group_labels=["R"], colors=["red"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distribution of red channel values")
fig.data[0].marker.line.color = 'rgb(0, 0, 0)'
fig.data[0].marker.line.width = 0.5
fig

### Green channel values

In [None]:
fig = ff.create_distplot([green_values], group_labels=["G"], colors=["green"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distribution of green channel values")
fig.data[0].marker.line.color = 'rgb(0, 0, 0)'
fig.data[0].marker.line.width = 0.5
fig

### Blue channel values

In [None]:
fig = ff.create_distplot([blue_values], group_labels=["B"], colors=["blue"])
fig.update_layout(showlegend=False, template="simple_white")
fig.update_layout(title_text="Distribution of blue channel values")
fig.data[0].marker.line.color = 'rgb(0, 0, 0)'
fig.data[0].marker.line.width = 0.5
fig

### All channels together

In [None]:
fig = go.Figure()

for idx, values in enumerate([red_values, green_values, blue_values]):
    if idx == 0:
        color = "Red"
    if idx == 1:
        color = "Green"
    if idx == 2:
        color = "Blue"
    fig.add_trace(go.Box(x=[color]*len(values), y=values, name=color, marker=dict(color=color.lower())))
    
fig.update_layout(yaxis_title="Mean value", xaxis_title="Color channel",
                  title="Mean value vs. Color channel", template="plotly_white")

In [None]:
fig = ff.create_distplot([red_values, green_values, blue_values],
                         group_labels=["R", "G", "B"],
                         colors=["red", "green", "blue"])
fig.update_layout(title_text="Distribution of red channel values", template="simple_white")
fig.data[0].marker.line.color = 'rgb(0, 0, 0)'
fig.data[0].marker.line.width = 0.5
fig.data[1].marker.line.color = 'rgb(0, 0, 0)'
fig.data[1].marker.line.width = 0.5
fig.data[2].marker.line.color = 'rgb(0, 0, 0)'
fig.data[2].marker.line.width = 0.5
fig

## Visualize targets

In [None]:
df_train['healthy'] = [1 if 'healthy' in x.split(' ') else 0 for x in df_train['labels']]
df_train['rust'] = [1 if 'rust' in x.split(' ') else 0 for x in df_train['labels']]
df_train['scab'] = [1 if 'scab' in x.split(' ') else 0 for x in df_train['labels']]
df_train['frog_eye_leaf_spot'] = [1 if 'frog_eye_leaf_spot' in x.split(' ') else 0 for x in df_train['labels']]
df_train['powdery_mildew'] = [1 if 'powdery_mildew' in x.split(' ') else 0 for x in df_train['labels']]
df_train['complex'] = [1 if 'complex' in x.split(' ') else 0 for x in df_train['labels']]

In [None]:
df_train_drop_labels = df_train.drop('labels', axis=1)
df_train_drop_labels.head()

In [None]:
# prepare data
df_train["Healthy"] = df_train["healthy"].apply(bool).apply(str)

true = df_train["Healthy"][df_train.Healthy == 'True']
false = df_train["Healthy"][df_train.Healthy == 'False']

trace1 = go.Histogram(
    x=true,
    opacity=0.75,
    name = "Healthy",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
    x=false,
    opacity=0.75,
    name = "Unhealthy",
    marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
                   title='Healthy distribution',
                   xaxis=dict(title='Healthy'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
# prepare data
df_train["Scab"] = df_train["scab"].apply(bool).apply(str)

true = df_train["Scab"][df_train.Scab == 'True']
false = df_train["Scab"][df_train.Scab == 'False']

trace1 = go.Histogram(
    x=true,
    opacity=0.75,
    name = "True",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
    x=false,
    opacity=0.75,
    name = "False",
    marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
                   title='Scab distribution',
                   xaxis=dict(title='Scab'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
# prepare data
df_train["Rust"] = df_train["rust"].apply(bool).apply(str)

true = df_train["Rust"][df_train.Rust == 'True']
false = df_train["Rust"][df_train.Rust == 'False']

trace1 = go.Histogram(
    x=true,
    opacity=0.75,
    name = "True",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
    x=false,
    opacity=0.75,
    name = "False",
    marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
                   title='Rust distribution',
                   xaxis=dict(title='Rust'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
# prepare data
df_train["Frogeye"] = df_train["frog_eye_leaf_spot"].apply(bool).apply(str)

true = df_train["Frogeye"][df_train.Frogeye == 'True']
false = df_train["Frogeye"][df_train.Frogeye == 'False']

trace1 = go.Histogram(
    x=true,
    opacity=0.75,
    name = "True",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
    x=false,
    opacity=0.75,
    name = "False",
    marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
                   title='Frogeye leaf spot distribution',
                   xaxis=dict(title='Frogeye'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
# prepare data
df_train["Powdery"] = df_train["powdery_mildew"].apply(bool).apply(str)

true = df_train["Powdery"][df_train.Powdery == 'True']
false = df_train["Powdery"][df_train.Powdery == 'False']

trace1 = go.Histogram(
    x=true,
    opacity=0.75,
    name = "True",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
    x=false,
    opacity=0.75,
    name = "False",
    marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
                   title='Powdery mildew distribution',
                   xaxis=dict(title='Powdery mildew'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
# prepare data
df_train["Complex"] = df_train["complex"].apply(bool).apply(str)

true = df_train["Complex"][df_train.Complex == 'True']
false = df_train["Complex"][df_train.Complex == 'False']

trace1 = go.Histogram(
    x=true,
    opacity=0.75,
    name = "True",
    marker=dict(color='rgba(171, 50, 96, 0.6)'))
trace2 = go.Histogram(
    x=false,
    opacity=0.75,
    name = "False",
    marker=dict(color='rgba(12, 50, 196, 0.6)'))

data = [trace1, trace2]
layout = go.Layout(barmode='overlay',
                   title='Complex distribution',
                   xaxis=dict(title='Complex'),
                   yaxis=dict( title='Count'),
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)

## Visualize frequency map

### Random one image

In [None]:
img = cv2.imread('../input/plant-pathology-2021-fgvc8/train_images/800113bb65efe69e.jpg', 0)
plt.imshow(img, cmap='gray')
plt.axis("off")
plt.show()

In [None]:
# convert image to floats and do dft saving as complex output
dft = cv2.dft(np.float32(img), flags = cv2.DFT_COMPLEX_OUTPUT)

# apply shift of origin from upper left corner to center of image
dft_shift = np.fft.fftshift(dft)

magnitude_spectrum = np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1]))
fig = plt.figure(figsize=(8,8))
plt.imshow(magnitude_spectrum, cmap='gray')
plt.axis("off")
plt.show()

### Batch label visualize

In [None]:
def frequency_visualize_by_label(df,batch_size,path,label): 
    sample_df = df_train[df_train["labels"]==label].sample(9)
    image_names = sample_df["image"].values
    labels = sample_df["labels"].values
    plt.figure(figsize=(16, 12))
    
    for image_ind, (image_name, label) in enumerate(zip(image_names, labels)):
        plt.subplot(3, 3, image_ind + 1)
        img = cv2.imread(os.path.join(path, image_name), 0)
        # convert image to floats and do dft saving as complex output
        dft = cv2.dft(np.float32(img), flags = cv2.DFT_COMPLEX_OUTPUT)

        # apply shift of origin from upper left corner to center of image
        dft_shift = np.fft.fftshift(dft)

        magnitude_spectrum = np.log(cv2.magnitude(dft_shift[:,:,0],dft_shift[:,:,1]))
        plt.imshow(magnitude_spectrum, cmap = 'gray')
        plt.axis("off")
    plt.show()

In [None]:
frequency_visualize_by_label(df_train,9,train_image_path,'healthy')

In [None]:
frequency_visualize_by_label(df_train,9,train_image_path,'rust')

In [None]:
frequency_visualize_by_label(df_train,9,train_image_path,'powdery_mildew')

In [None]:
frequency_visualize_by_label(df_train,9,train_image_path,'scab')

In [None]:
frequency_visualize_by_label(df_train,9,train_image_path,'frog_eye_leaf_spot')

In [None]:
frequency_visualize_by_label(df_train,9,train_image_path,'complex')

## Local Features

### Histogram of Gradient (HoG)

#### Random an image

In [None]:
import skimage
import copy

img = cv2.imread('../input/plant-pathology-2021-fgvc8/test_images/ad8770db05586b59.jpg')

scale_percent = 40 # percent of original size
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
 
# resize image
resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)

fd, hog_image = skimage.feature.hog(resized, orientations=8, pixels_per_cell=(16, 16),
                                    cells_per_block=(1, 1), visualize=True, multichannel=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6), sharex=True, sharey=True)

ax1.axis('off')
ax1.imshow(resized, cmap=plt.cm.gray)
ax1.set_title('Input image')

# Rescale histogram for better display
hog_image_rescaled = skimage.exposure.rescale_intensity(hog_image, in_range=(0, 5))

ax2.axis('off')
ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)
ax2.set_title('Histogram of Oriented Gradients')
plt.show()

#### HoG by label

In [None]:
def hog_visualize_by_label(df,batch_size,path,label): 
    sample_df = df_train[df_train["labels"]==label].sample(9)
    image_names = sample_df["image"].values
    labels = sample_df["labels"].values
    plt.figure(figsize=(16, 12))
    
    for image_ind, (image_name, label) in enumerate(zip(image_names, labels)):
        plt.subplot(3, 3, image_ind + 1)
        img = cv2.imread(os.path.join(path, image_name))
        
        scale_percent = 40 # percent of original size
        width = int(img.shape[1] * scale_percent / 100)
        height = int(img.shape[0] * scale_percent / 100)
        dim = (width, height)

        # resize image
        resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
        
        fd, hog_image = skimage.feature.hog(resized, orientations=8, pixels_per_cell=(16, 16), 
                                            cells_per_block=(1, 1), visualize=True, multichannel=True)
        
        # Rescale histogram for better display
        hog_image_rescaled = skimage.exposure.rescale_intensity(hog_image, in_range=(0, 10))
        
        plt.imshow(hog_image_rescaled, cmap=plt.cm.gray)
        plt.axis("off")
    plt.show()

In [None]:
hog_visualize_by_label(df_train,9,train_image_path,'healthy')

In [None]:
hog_visualize_by_label(df_train,9,train_image_path,'rust')

In [None]:
hog_visualize_by_label(df_train,9,train_image_path,'powdery_mildew')

In [None]:
hog_visualize_by_label(df_train,9,train_image_path,'scab')

In [None]:
hog_visualize_by_label(df_train,9,train_image_path,'frog_eye_leaf_spot')

In [None]:
hog_visualize_by_label(df_train,9,train_image_path,'complex')

### Scale-Invariant Feature Transform (SIFT)

#### Random an image

In [None]:
import cv2
from matplotlib import pyplot as plt

img = cv2.imread('../input/plant-pathology-2021-fgvc8/train_images/8002cb321f8bfcdf.jpg')

scale_percent = 20 # percent of original size
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
 
# resize image
resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)

sift = cv2.xfeatures2d.SIFT_create()

kp, des = sift.detectAndCompute(gray,None)

#img=cv2.drawKeypoints(gray,kp,img)
img=cv2.drawKeypoints(gray,kp,img, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

#img_final = cv2.drawKeypoints(img, keypoint, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

plt.figure(figsize=(8, 4))
plt.imshow(img)
plt.axis('off')
plt.show

#### Visualize by label

In [None]:
def sift_visualize_by_label(df,batch_size,path,label): 
    sample_df = df_train[df_train["labels"]==label].sample(9)
    image_names = sample_df["image"].values
    labels = sample_df["labels"].values
    plt.figure(figsize=(16, 12))
    
    for image_ind, (image_name, label) in enumerate(zip(image_names, labels)):
        plt.subplot(3, 3, image_ind + 1)
        img = cv2.imread(os.path.join(path, image_name))
        
        scale_percent = 40 # percent of original size
        width = int(img.shape[1] * scale_percent / 100)
        height = int(img.shape[0] * scale_percent / 100)
        dim = (width, height)

        # resize image
        resized = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)
        
        gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
        
        sift = cv2.xfeatures2d.SIFT_create()

        kp, des = sift.detectAndCompute(gray,None)

        img = cv2.drawKeypoints(gray, kp, img, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        
        plt.imshow(img, cmap=plt.cm.gray)
        plt.axis("off")
    plt.show()

In [None]:
sift_visualize_by_label(df_train,9,train_image_path,'healthy')

In [None]:
sift_visualize_by_label(df_train,9,train_image_path,'rust')

In [None]:
sift_visualize_by_label(df_train,9,train_image_path,'powdery_mildew')

In [None]:
sift_visualize_by_label(df_train,9,train_image_path,'scab')

In [None]:
sift_visualize_by_label(df_train,9,train_image_path,'frog_eye_leaf_spot')

In [None]:
sift_visualize_by_label(df_train,9,train_image_path,'complex')