In [None]:
from google.colab import files
import zipfile
import os

uploaded = files.upload()

In [None]:
filename = list(uploaded.keys())[0]
with zipfile.ZipFile(filename,'r') as ref:
    ref.extractall('/content/grayscale_defungi_224')
data_dir = '/content/grayscale_defungi_224'
class_names = ['H1', 'H2', 'H3', 'H5', 'H6']

In [None]:
!pip install scikit-image xgboost tqdm



In [None]:
import numpy as np
import pandas as pd
from skimage.io import imread
from skimage.feature import hog
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, label_binarize
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from tqdm import tqdm

In [None]:
lbp_radius = 1
lbp_n_points = 8 * lbp_radius
features = []
labels = []

for cls in tqdm(class_names):
    cls_path = os.path.join(data_dir, cls)
    for img_file in os.listdir(cls_path):
        img_path = os.path.join(cls_path, img_file)
        try:
            img = imread(img_path, as_gray=True)
            lbp = local_binary_pattern(img, lbp_n_points, lbp_radius, method='uniform')
            lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, lbp_n_points + 3), density=True)
            features.append(lbp_hist)
            labels.append(cls)
        except Exception as e:
            print(f"Skipping {img_file}: {e}")

X = np.array(features)
y = np.array(labels)

100%|██████████| 5/5 [01:54<00:00, 22.85s/it]


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf')
rf = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')

svm.fit(X_train_scaled, y_train)
rf.fit(X_train_scaled, y_train)
xgb.fit(X_train_scaled, y_train)

Parameters: { "use_label_encoder" } are not used.



In [None]:
models = {'SVM': svm, 'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    preds = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, preds)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds, target_names=le.classes_))


SVM Accuracy: 0.5500
              precision    recall  f1-score   support

          H1       0.57      0.67      0.61       500
          H2       0.40      0.42      0.41       500
          H3       0.54      0.61      0.57       500
          H5       0.64      0.62      0.63       500
          H6       0.66      0.43      0.52       500

    accuracy                           0.55      2500
   macro avg       0.56      0.55      0.55      2500
weighted avg       0.56      0.55      0.55      2500


Random Forest Accuracy: 0.5988
              precision    recall  f1-score   support

          H1       0.57      0.61      0.59       500
          H2       0.47      0.48      0.47       500
          H3       0.59      0.64      0.62       500
          H5       0.70      0.67      0.69       500
          H6       0.67      0.59      0.63       500

    accuracy                           0.60      2500
   macro avg       0.60      0.60      0.60      2500
weighted avg       0.60

In [None]:
hog_pixels_per_cell = (16, 16)
hog_cells_per_block = (2, 2)
features = []
labels = []

for cls in tqdm(class_names):
    cls_path = os.path.join(data_dir, cls)
    for img_file in os.listdir(cls_path):
        img_path = os.path.join(cls_path, img_file)
        try:
            img = imread(img_path, as_gray=True)
            hog_feat = hog(img, pixels_per_cell=hog_pixels_per_cell,
                           cells_per_block=hog_cells_per_block,
                           feature_vector=True)
            features.append(hog_feat)
            labels.append(cls)
        except Exception as e:
            print(f"Skipping {img_file}: {e}")

X = np.array(features)
y = np.array(labels)

100%|██████████| 5/5 [01:50<00:00, 22.15s/it]


In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', probability=True)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')

svm.fit(X_train_scaled, y_train)
rf.fit(X_train_scaled, y_train)
xgb.fit(X_train_scaled, y_train)

Parameters: { "use_label_encoder" } are not used.



In [None]:
models = {'SVM': svm, 'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    preds = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, preds)
    print(f"\n{name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds, target_names=le.classes_))


SVM Accuracy: 0.5300
              precision    recall  f1-score   support

          H1       0.56      0.65      0.61       500
          H2       0.40      0.46      0.43       500
          H3       0.53      0.50      0.51       500
          H5       0.58      0.55      0.56       500
          H6       0.61      0.49      0.54       500

    accuracy                           0.53      2500
   macro avg       0.54      0.53      0.53      2500
weighted avg       0.54      0.53      0.53      2500


Random Forest Accuracy: 0.4412
              precision    recall  f1-score   support

          H1       0.49      0.65      0.56       500
          H2       0.32      0.40      0.36       500
          H3       0.39      0.27      0.32       500
          H5       0.53      0.43      0.47       500
          H6       0.50      0.46      0.48       500

    accuracy                           0.44      2500
   macro avg       0.45      0.44      0.44      2500
weighted avg       0.45

In [None]:
from google.colab import files
import zipfile
import os

uploaded = files.upload()

Saving grayscale_defungi_224.zip to grayscale_defungi_224.zip


In [None]:
filename = list(uploaded.keys())[0]
with zipfile.ZipFile(filename,'r') as ref:
    ref.extractall('/content/grayscale_defungi_224')
data_dir = '/content/grayscale_defungi_224'
class_names = ['H1', 'H2', 'H3', 'H5', 'H6']

In [None]:
!pip install scikit-image xgboost tqdm
import numpy as np
import pandas as pd
from skimage.io import imread
from skimage.feature import hog
from skimage.feature import local_binary_pattern
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, label_binarize
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.multiclass import OneVsRestClassifier
from tqdm import tqdm



In [None]:
lbp_radius = 1
lbp_n_points = 8 * lbp_radius
features = []
labels = []

In [None]:
for cls in tqdm(class_names):
    cls_path = os.path.join(data_dir, cls)
    for img_file in os.listdir(cls_path):
        img_path = os.path.join(cls_path, img_file)
        try:
            img = imread(img_path, as_gray=True)
            lbp = local_binary_pattern(img, lbp_n_points, lbp_radius, method='uniform')
            lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, lbp_n_points + 3), density=True)
            hog_feat = hog(img, pixels_per_cell=(16, 16), cells_per_block=(2, 2), orientations=9, block_norm='L2-Hys')
            combined_feat = np.concatenate((lbp_hist, hog_feat))
            features.append(combined_feat)
            labels.append(cls)
        except Exception as e:
            print(f"Skipping {img_file}: {e}")

100%|██████████| 5/5 [03:28<00:00, 41.65s/it]


In [None]:
X = np.array(features)
y = np.array(labels)

le = LabelEncoder()
y_encoded = le.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm = SVC(kernel='rbf', probability=True)
rf = RandomForestClassifier(n_estimators=100)
xgb = XGBClassifier(eval_metric='mlogloss')

svm.fit(X_train_scaled, y_train)
rf.fit(X_train_scaled, y_train)
xgb.fit(X_train_scaled, y_train)

models = {'SVM': svm, 'Random Forest': rf, 'XGBoost': xgb}
for name, model in models.items():
    preds = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, preds)
    print(f"\n {name} Accuracy: {acc:.4f}")
    print(classification_report(y_test, preds, target_names=le.classes_))



 SVM Accuracy: 0.5268
              precision    recall  f1-score   support

          H1       0.54      0.64      0.59       500
          H2       0.39      0.43      0.41       500
          H3       0.52      0.54      0.53       500
          H5       0.59      0.52      0.55       500
          H6       0.63      0.50      0.56       500

    accuracy                           0.53      2500
   macro avg       0.53      0.53      0.53      2500
weighted avg       0.53      0.53      0.53      2500


 Random Forest Accuracy: 0.5232
              precision    recall  f1-score   support

          H1       0.54      0.65      0.59       500
          H2       0.40      0.44      0.42       500
          H3       0.51      0.51      0.51       500
          H5       0.61      0.54      0.57       500
          H6       0.60      0.47      0.53       500

    accuracy                           0.52      2500
   macro avg       0.53      0.52      0.52      2500
weighted avg       0.

In [None]:
#for 500 and 1500 dataset image per class for comparison
import os
import shutil
import random
import zipfile
from google.colab import files

uploaded = files.upload()

Saving grayscale_defungi_224.zip to grayscale_defungi_224.zip


In [None]:
filename = list(uploaded.keys())[0]
with zipfile.ZipFile(filename,'r') as ref:
    ref.extractall('/content/grayscale_defungi_224')

source_dir = '/content/grayscale_defungi_224'
target_dir_500 = '/content/grayscale_defungi_500'
target_dir_1500 = '/content/grayscale_defungi_1500'

drive_save_500 = '/content/drive/MyDrive/grayscale_defungi_500'
drive_save_1500 = '/content/drive/MyDrive/grayscale_defungi_1500'
drive_save_500 = '/content/drive/MyDrive/grayscale_defungi_500'
drive_save_1500 = '/content/drive/MyDrive/grayscale_defungi_1500'

class_names = ['H1', 'H2', 'H3', 'H5', 'H6']

In [None]:
def create_balanced_subset(src_root, dest_root, images_per_class):
    os.makedirs(dest_root, exist_ok=True)

    for cls in class_names:
        src_cls_path = os.path.join(src_root, cls)
        dest_cls_path = os.path.join(dest_root, cls)
        os.makedirs(dest_cls_path, exist_ok=True)

        images = os.listdir(src_cls_path)
        selected_images = random.sample(images, images_per_class)

        for img in selected_images:
            src_img_path = os.path.join(src_cls_path, img)
            dest_img_path = os.path.join(dest_cls_path, img)
            shutil.copy2(src_img_path, dest_img_path)


In [None]:
create_balanced_subset(source_dir, target_dir_500, 500)
shutil.copytree(target_dir_500, drive_save_500)

create_balanced_subset(source_dir, target_dir_1500, 1500)
shutil.copytree(target_dir_1500, drive_save_1500)

shutil.make_archive('grayscale_defungi_500', 'zip', 'grayscale_defungi_500')
shutil.make_archive('grayscale_defungi_1500', 'zip', 'grayscale_defungi_1500')

files.download('grayscale_defungi_500.zip')
files.download('grayscale_defungi_1500.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>