In [8]:
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

def extract_shape_features(image):
    # Convert to binary image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Calculate properties
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Shape features
    area = cv2.contourArea(largest_contour)
    perimeter = cv2.arcLength(largest_contour, True)
    x, y, w, h = cv2.boundingRect(largest_contour)
    aspect_ratio = float(w)/h
    circularity = 4*np.pi*area/(perimeter*perimeter)
    
    return [area, perimeter, aspect_ratio, circularity]

def extract_color_features(image):
    # Convert to different color spaces
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    
    # Calculate color moments
    color_features = []
    for img in [image, hsv, lab]:
        for channel in cv2.split(img):
            color_features.extend([
                np.mean(channel),
                np.std(channel),
                np.mean(((channel - np.mean(channel))**3))
            ])
    
    return color_features

def extract_texture_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # GLCM features
    glcm = graycomatrix(gray, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')
    correlation = graycoprops(glcm, 'correlation')
    energy = graycoprops(glcm, 'energy')
    homogeneity = graycoprops(glcm, 'homogeneity')
    
    # LBP features
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    hist_lbp, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    hist_lbp = hist_lbp.astype("float")
    hist_lbp /= (hist_lbp.sum() + 1e-7)
    
    texture_features = np.concatenate([
        contrast.ravel(), correlation.ravel(),
        energy.ravel(), homogeneity.ravel(),
        hist_lbp
    ])
    
    return texture_features

def extract_all_features(image):
    shape_feat = extract_shape_features(image)
    color_feat = extract_color_features(image)
    texture_feat = extract_texture_features(image)
    
    return {
        'shape': shape_feat,
        'color': color_feat,
        'texture': texture_feat,
        'all': np.concatenate([shape_feat, color_feat, texture_feat])
    }

def evaluate_model(X_train, X_test, y_train, y_test, model, model_name):
    # Train and evaluate
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Print results
    print(f"\nResults for {model_name}")
    print(classification_report(y_test, y_pred))
    
    # Cross-validation score
    cv_scores = cross_val_score(model, X_train, y_train, cv=5)
    print(f"Cross-validation scores: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")

def main(train_images, train_labels, val_images, val_labels):
    # Extract features for all images
    print("Extracting train features...")
    train_images_features = [extract_all_features(img) for img in train_images]
    print("Extracting val features...")
    val_images_features = [extract_all_features(img) for img in val_images]
    
    # Define feature sets
    train_feature_sets = {
        'shape': np.array([f['shape'] for f in train_images_features]),
        'color': np.array([f['color'] for f in train_images_features]),
        'texture': np.array([f['texture'] for f in train_images_features]),
        'all': np.array([f['all'] for f in train_images_features])
    }

    val_feature_sets = {
        'shape': np.array([f['shape'] for f in val_images_features]),
        'color': np.array([f['color'] for f in val_images_features]),
        'texture': np.array([f['texture'] for f in val_images_features]),
        'all': np.array([f['all'] for f in val_images_features])
    }
    
    # Define models to test
    models = {
        'SVM': SVC(kernel='rbf'),
        'Random Forest': RandomForestClassifier(n_estimators=100),
        'KNN': KNeighborsClassifier(n_neighbors=5),
        'XGBoost': XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic'),
        'CatBoost' : CatBoostClassifier(iterations=50, depth=2, learning_rate=1, loss_function='Logloss', verbose=False)
    }
    
    # Test each combination of feature set and model
    for feature_name, feature in train_feature_sets.items():
        print(f"\nTesting feature set: {feature_name}")
        
        # Scale features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(feature)
        X_test = scaler.transform(val_feature_sets[feature_name])
        
        le = LabelEncoder()
        y_train = le.fit_transform(train_labels)
        y_test = le.transform(val_labels)
        
        # Test each model
        for model_name, model in models.items():
            evaluate_model(X_train, X_test, y_train, y_test, model, model_name)

def load_dataset(path_dir):
    images = []
    classes = []
    for labels in path_dir.iterdir():
        for image_path in labels.iterdir():
            img = cv2.imread(image_path, cv2.IMREAD_COLOR)
            images.append(img)
            classes.append(labels.name)
    return images, classes

In [2]:
from pathlib import Path
path_train = Path("./dataset/split/train")
path_val = Path("./dataset/split/val")

train_images, train_labels = load_dataset(path_train)
val_images, val_labels = load_dataset(path_val)

In [9]:
main(train_images, train_labels, val_images, val_labels)

Extracting train features...
Extracting val features...

Testing feature set: shape

Results for SVM
              precision    recall  f1-score   support

           0       0.55      0.89      0.68        88
           1       0.58      0.18      0.27        78

    accuracy                           0.55       166
   macro avg       0.57      0.53      0.48       166
weighted avg       0.57      0.55      0.49       166

Cross-validation scores: 0.483 (+/- 0.078)

Results for Random Forest
              precision    recall  f1-score   support

           0       0.63      0.57      0.60        88
           1       0.56      0.63      0.59        78

    accuracy                           0.60       166
   macro avg       0.60      0.60      0.60       166
weighted avg       0.60      0.60      0.60       166

Cross-validation scores: 0.535 (+/- 0.130)

Results for KNN
              precision    recall  f1-score   support

           0       0.57      0.76      0.65        88
      