# Importing Libraries

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import os
import cv2 as cv
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Reading the Data

In [3]:
input_dir = 'rice_leaf_diseases'
output_dir_resized = 'rice_leaf_diseases_resized'
output_dir_downsampled = 'rice_leaf_diseases_downsampled'

In [4]:
# Create output directories if they don't exist
os.makedirs(output_dir_resized, exist_ok=True)
os.makedirs(output_dir_downsampled, exist_ok=True)

target_width = 224
target_height = 224
downsample_factor = 0.5  # Reducing image dimensions by 50%

In [5]:
# Iterate through each category
for category in os.listdir(input_dir):
    category_path = os.path.join(input_dir, category)
    resized_category_path = os.path.join(output_dir_resized, category)
    downsampled_category_path = os.path.join(output_dir_downsampled, category)
    os.makedirs(resized_category_path, exist_ok=True)
    os.makedirs(downsampled_category_path, exist_ok=True)

    # Iterate through images in the category
    for img_name in os.listdir(category_path):
        img_path = os.path.join(category_path, img_name)
        img = Image.open(img_path)

        # Resize image to the target width and height
        resized_img = img.resize((target_width, target_height))

        # Save resized image
        resized_img_path = os.path.join(resized_category_path, img_name)
        resized_img.save(resized_img_path)

        # Downsample image
        downsampled_img = img.resize((int(img.width * downsample_factor), int(img.height * downsample_factor)))

        # Save downsampled image
        downsampled_img_path = os.path.join(downsampled_category_path, img_name)
        downsampled_img.save(downsampled_img_path)

In [12]:
disease = os.listdir('rice_leaf_diseases_resized')
main_list = []
for i in range(len(disease)):
    sub_list=[]
    os.chdir('rice_leaf_diseases_resized/{}'.format(disease[i]))
    imgs = os.listdir()
    for j in imgs:
        img = cv.imread(j,-1).flatten()
        sub_list.append([img,disease[i]])
    main_list.append(sub_list)
    os.chdir('..')
    os.chdir('..')

In [13]:
df_main_list = []
for i in range(3):
    df = pd.DataFrame(main_list[i])
    df_main_list.append(df)
df = pd.concat(df_main_list,axis=0,ignore_index=True)
df = pd.concat([df[0].apply(pd.Series),df[1]],axis=1,ignore_index=True) # Explode the lists
# shuffle all the  rows
df = df.sample(frac=1).reset_index(drop=True)

In [14]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,150519,150520,150521,150522,150523,150524,150525,150526,150527,150528
0,255,255,255,255,255,255,255,255,255,255,...,255,255,255,255,255,255,255,255,255,Brown spot
1,221,224,238,221,224,238,221,224,238,221,...,216,225,239,216,225,239,215,224,238,Bacterial leaf blight
2,66,167,169,65,166,168,63,165,164,66,...,37,64,85,35,61,85,33,59,83,Leaf smut
3,36,117,84,36,117,84,34,118,84,35,...,16,125,99,15,123,100,14,122,99,Leaf smut
4,255,255,255,255,255,255,255,255,255,255,...,253,255,255,249,254,253,202,207,206,Bacterial leaf blight
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,224,226,237,224,226,237,224,226,237,224,...,210,213,227,210,213,227,210,213,227,Bacterial leaf blight
116,227,229,240,227,229,240,227,229,240,226,...,219,222,236,219,222,236,219,222,236,Leaf smut
117,219,228,238,219,228,238,219,228,238,219,...,217,226,239,217,226,239,217,226,239,Bacterial leaf blight
118,211,214,229,211,214,229,211,214,229,211,...,211,214,228,211,214,228,211,214,228,Bacterial leaf blight


# Data Preprocessing

In [16]:
x = df.iloc[:,:-1]

In [20]:
y = df.iloc[:,-1]

In [22]:
x_train, x_test, y_train, y_test =train_test_split(x,y,test_size=0.15)

# Model Training and Prediction

In [23]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion = 'entropy', random_state =47)
tree.fit(x_train, y_train)

In [24]:
y_pred = tree.predict(x_test)

# Model Evaluation

In [25]:
from sklearn.metrics import confusion_matrix, accuracy_score

print("ACC :", accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

ACC : 0.7222222222222222
[[5 1 3]
 [0 4 0]
 [0 1 4]]
