In [2]:
import itertools
import pandas as pd
import numpy as np
from tqdm import tqdm  # Progress tracking
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from itertools import combinations
from sklearn.ensemble import RandomForestClassifier

In [3]:
datasets = ['BC-15','Huongthom', 'Nep87','Q5','Thien_uu','Xi23']

In [4]:
df = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/all_with_zernike_pluscolor_enahnced_edge_BC-15.csv')

In [5]:
basic_features = ["area", "length", "width", "ratio", "major_axis_length", "minor_axis_length", "convex_hull_area",
                  "convex_hull_perimeter","mean_r","mean_g","mean_b","red_sqr","green_sqr","blue_sqr","texture_mean",
                  "texture_std","texture_uniformity","texture_third_moment"]
enhanced_color_features = ['mean_h','mean_s','mean_v','std_h','std_s','std_v','hue_sqr','sat_sqr',
'val_sqr','mean_y','mean_cr','mean_cb','std_y','std_cr','std_cb','y_sqr','cr_sqr','cb_sqr',
'mean_l','mean_a','mean_b','std_l','std_a','std_b_lab','l_sqr','a_sqr','b_lab_sqr']

zernike_features = [col for col in df.columns if col.startswith("zernike_")]
color_features = [col for col in df.columns if col.startswith("csd_")]
lbp_features = [col for col in df.columns if col.startswith("LBP_")]
glcm_features = [col for col in df.columns if col.startswith("GLCM_")]
gist_features = [col for col in df.columns if col.startswith("GIST_")]
edge_features = [col for col in df.columns if col.startswith("edge_energy")]

In [6]:

# Feature group dictionary
feature_groups = {
    "Basic": basic_features,
    "Enhanced Color": enhanced_color_features,
    "Zernike moments": zernike_features,
    "LBP": lbp_features,
    "GLCM": glcm_features,
    # "GIST": gist_features,
    "Edge":edge_features
}

# Generate all feature combinations
feature_combinations = []
for r in range(2, len(feature_groups) + 1):  # Start from 2 instead of 1
    for combo in itertools.combinations(feature_groups.keys(), r):
        feature_combinations.append(combo)


In [7]:
# Define models and their hyperparameter search spaces
param_distributions = {
    "K-Nearest Neighbors": {
        "n_neighbors": np.arange(1, 70, 2),
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan"]
    },
    "Support Vector Machine": {
        "C": np.logspace(-3, 2, 10),
        "kernel": ["linear", "rbf", "poly", "sigmoid"],
        "gamma": ["scale", "auto"]
    },
    "Random Forest": {
        "n_estimators": [100, 150, 200, 250,300],
        "max_depth": [10, 20, 30, None],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "bootstrap": [True, False]
    }
}

# Define models
models = {
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(),
    "Random Forest": RandomForestClassifier()
}



In [8]:
all_results = []

In [9]:
# Store results


# Loop through each dataset
for dataset in tqdm(datasets, desc="Processing Datasets"):
    # Load dataset
    df = pd.read_csv(f'/content/drive/MyDrive/ColabNotebooks/all_with_zernike_pluscolor_enahnced_edge_{dataset}.csv')

    # Loop through each feature combination
    for feature_combo in tqdm(feature_combinations,desc=f"Feature Combos for {dataset}", leave=False):
        selected_features = []

        # Combine selected feature groups
        for group in feature_combo:
            selected_features.extend(feature_groups[group])
        df_selected = df[selected_features + ["Label"]]
        # Split dataset
        X = df_selected.drop(columns=['Label'])
        y = df_selected['Label']

        # Normalize features
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42, stratify=y)

        # Loop through models with progress tracking
        for name, model in tqdm(models.items(), desc=f"Training Models for {dataset} - {feature_combo}", leave=False):

            # Hyperparameter tuning with RandomizedSearchCV
            random_search = RandomizedSearchCV(
                model,
                param_distributions[name],
                n_iter=20,
                scoring='accuracy',
                cv=5,
                verbose=0,
                random_state=42,
                n_jobs=-1
            )

            # Perform search
            random_search.fit(X_train, y_train)

            # Get best model and parameters
            best_model = random_search.best_estimator_
            best_params = random_search.best_params_

            # Make predictions
            y_pred = best_model.predict(X_test)

            # Compute evaluation metrics
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='weighted')
            recall = recall_score(y_test, y_pred, average='weighted')
            f1 = f1_score(y_test, y_pred, average='weighted')
            cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')

            # Save results
            all_results.append({
                "Model": name,
                "Dataset": dataset,
                "Feature Combination": "+".join(feature_combo),
                "Best Params": best_params,
                "Accuracy": accuracy,
                "Precision": precision,
                "Recall": recall,
                "F1 Score": f1,
                "CV_scores(5fold)": np.mean(cv_scores)
            })



Processing Datasets:   0%|          | 0/5 [00:00<?, ?it/s]
Feature Combos for Huongthom:   0%|          | 0/57 [00:00<?, ?it/s][A

Training Models for Huongthom - ('Basic', 'Enhanced Color'):   0%|          | 0/3 [00:00<?, ?it/s][A[A

Training Models for Huongthom - ('Basic', 'Enhanced Color'):  33%|███▎      | 1/3 [00:08<00:16,  8.39s/it][A[A

Training Models for Huongthom - ('Basic', 'Enhanced Color'):  67%|██████▋   | 2/3 [00:28<00:15, 15.28s/it][A[A

Training Models for Huongthom - ('Basic', 'Enhanced Color'): 100%|██████████| 3/3 [04:42<00:00, 124.16s/it][A[A

                                                                                                           [A[A
Feature Combos for Huongthom:   2%|▏         | 1/57 [04:42<4:23:26, 282.25s/it][A

Training Models for Huongthom - ('Basic', 'Zernike moments'):   0%|          | 0/3 [00:00<?, ?it/s][A[A

Training Models for Huongthom - ('Basic', 'Zernike moments'):  33%|███▎      | 1/3 [00:04<00:09,  4.89s/it][A[A


In [10]:
all_results_pd = pd.DataFrame(all_results)

In [13]:
all_results_pd

Unnamed: 0,Model,Dataset,Feature Combination,Best Params,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,BC-15,Basic+Enhanced Color,"{'weights': 'distance', 'n_neighbors': 11, 'me...",0.885502,0.892514,0.885502,0.885003,0.879820
1,Support Vector Machine,BC-15,Basic+Enhanced Color,"{'kernel': 'rbf', 'gamma': 'auto', 'C': 7.7426...",0.928336,0.929771,0.928336,0.928280,0.927725
2,Random Forest,BC-15,Basic+Enhanced Color,"{'n_estimators': 200, 'min_samples_split': 2, ...",0.913509,0.914896,0.913509,0.913441,0.899307
3,K-Nearest Neighbors,BC-15,Basic+Zernike moments,"{'weights': 'distance', 'n_neighbors': 11, 'me...",0.878913,0.889295,0.878913,0.878119,0.876574
4,Support Vector Machine,BC-15,Basic+Zernike moments,"{'kernel': 'rbf', 'gamma': 'scale', 'C': 2.154...",0.922570,0.924069,0.922570,0.922506,0.935038
...,...,...,...,...,...,...,...,...,...
198,K-Nearest Neighbors,Huongthom,Zernike moments+LBP,"{'weights': 'distance', 'n_neighbors': 11, 'me...",0.862044,0.865409,0.862044,0.861644,0.868705
199,Support Vector Machine,Huongthom,Zernike moments+LBP,"{'kernel': 'rbf', 'gamma': 'auto', 'C': 7.7426...",0.897080,0.897572,0.897080,0.897027,0.900360
200,Random Forest,Huongthom,Zernike moments+LBP,"{'n_estimators': 200, 'min_samples_split': 2, ...",0.878102,0.879262,0.878102,0.877968,0.876259
201,K-Nearest Neighbors,Huongthom,Zernike moments+GLCM,"{'weights': 'uniform', 'n_neighbors': 13, 'met...",0.885401,0.892186,0.885401,0.884815,0.889568
