In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2

In [None]:
from pathlib import Path

data_color = '/kaggle/input/plantvillage-dataset/color'
data_grayscale='/kaggle/input/plantvillage-dataset/grayscale'
data_segmented="/kaggle/input/plantvillage-dataset/segmented"

paths = [path.parts[-2:] for path in
         Path(data_color).rglob('*.*')]                             
df = pd.DataFrame(data=paths, columns=['Class','Images'])     
df = df.sort_values('Class',ascending=True)                   
df.reset_index(drop=True, inplace=True)                       

In [None]:
class_names=df["Class"].unique()

species=[]
for i in class_names:
    name=i.split("_")
    if name[0] not in species:
        species.append(name[0])

print(f"Number of distinct species: {len(species)}")
print(species,'\n')

print(f"Number of distinct classes: {len(class_names)}")
print(class_names)

df["Class"].value_counts()

In [None]:
plt.figure(figsize=(10,10))
sns.countplot(df['Class'],palette='muted')
plt.title('Distribution of Classes')
plt.show()

In [None]:
def aspect_ratio(img):
    return img.shape[1]/img.shape[0]

def resolution(img):
    return f'{img.shape[1]}x{img.shape[0]}'  

def blur_score(path):
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if img is None:
        return None
        
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    return cv2.Laplacian(gray, cv2.CV_64F).var()

def brightness(path):
    img = cv2.imread(path)
    if img is None: return None
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    return hsv[...,2].mean()

def has_black_background(path):
    img = cv2.imread(path)
    if img is None: return None
    
    gray=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    black_pixels = np.sum(gray < 10)
    
    total_pixels = gray.size
    black_ratio = black_pixels / total_pixels
    return round(black_ratio, 2)

In [None]:
data=[]
blur_scores=[]
brightness_scores=[]
black_score=[]

dir=r'/kaggle/input/plantvillage-dataset/color'

for i in os.listdir(dir):
    path=os.path.join(dir,i)
    for j in os.listdir(path):
        img_path=os.path.join(path,j)
        img=cv2.imread(img_path)
        img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)

        ar=aspect_ratio(img)
        res=resolution(img)
        data.append({'Class':i,'Image':j,'aspect_ratio':ar,'resolution':res})
        
        blur_scores.append({'Image':img_path,'Blur Score':blur_score(img_path)})
        brightness_scores.append({'Image':img_path,'Brightness Score':brightness(img_path)})
        black_score.append({'Image':img_path,'Black Score':has_black_background(img_path)})

data=pd.DataFrame(data)
print(data.head(),'\n')

blur_scores=pd.DataFrame(blur_scores)
blur_scores=blur_scores.sort_values(by='Blur Score',ascending=True,ignore_index=True)
print(blur_scores.head(),'\n')

brightness_scores=pd.DataFrame(brightness_scores)
brightness_scores=brightness_scores.sort_values(by='Brightness Score',ascending=True,ignore_index=True)
print(brightness_scores.head(),'\n')

black_score=pd.DataFrame(black_score)
black_score=black_score.sort_values(by='Black Score',ascending=True,ignore_index=True)
print(black_score.head(),'\n')

In [None]:
sns.histplot(blur_scores['Blur Score'])
plt.show()

sns.histplot(brightness_scores['Brightness Score'])
plt.show()

sns.histplot(black_score['Black Score'])
plt.show()

In [None]:
black=black_score[black_score['Black Score'] > 0.09]
print(len(black))

i=0
for path in black['Image']: 
    img=cv2.imread(path)
    img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.axis("off")
    plt.show()
    i+=1

    if i==5:
        break 