In [4]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt


dir = '/run/media/csr/New Volume/Python/Computer Vision/Project/Dataset/Processed Images_Fruits/'


categories = {
    'Bad Quality_Fruits': 'bad',
    'Good Quality_Fruits': 'good',
    'Mixed Qualit_Fruits': 'mixed'  
}

filepaths, labels = [], []


for folder, label in categories.items():
    quality_path = os.path.join(dir, folder)
    if not os.path.exists(quality_path):
        print(f"Missing path: {quality_path}")
        continue

    for fruit_folder in os.listdir(quality_path):
        fruit_path = os.path.join(quality_path, fruit_folder)
        for img_name in os.listdir(fruit_path):
            img_path = os.path.join(fruit_path, img_name)
            filepaths.append(img_path)
            
            labels.append(label)


df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})


train_df, dummy_df = train_test_split(df, train_size=0.9, shuffle=True, random_state=123, stratify=df['labels'])
test_df, valid_df = train_test_split(dummy_df, train_size=0.5, shuffle=True, random_state=123, stratify=dummy_df['labels'])


def extract_features(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return np.zeros((512,))
    img = cv2.resize(img, (64, 64))
    hist = cv2.calcHist([img], [0, 1, 2], None, [8, 8, 8], [0, 256]*3)
    hist = cv2.normalize(hist, hist).flatten()
    return hist

# Feature extraction
X_train = np.array([extract_features(fp) for fp in train_df['filepaths']])
y_train = train_df['labels'].values

X_valid = np.array([extract_features(fp) for fp in valid_df['filepaths']])
y_valid = valid_df['labels'].values

X_test = np.array([extract_features(fp) for fp in test_df['filepaths']])
y_test = test_df['labels'].values

# Encode string labels to numbers
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_valid_enc = le.transform(y_valid)
y_test_enc = le.transform(y_test)

# Train Random Forest
clf = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
clf.fit(X_train, y_train_enc)

# Predict and evaluate
y_pred = clf.predict(X_test)
acc = accuracy_score(y_test_enc, y_pred)
print(f"\n🌳 Random Forest Accuracy: {acc:.2f}")

print("\n📊 Classification Report:")
print(classification_report(y_test_enc, y_pred, target_names=le.classes_))



🌳 Random Forest Accuracy: 0.87

📊 Classification Report:
              precision    recall  f1-score   support

         bad       0.89      0.85      0.87       339
        good       0.87      0.95      0.91       583
       mixed       0.69      0.17      0.27        54

    accuracy                           0.87       976
   macro avg       0.82      0.66      0.68       976
weighted avg       0.87      0.87      0.86       976

