In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations
from sklearn.ensemble import RandomForestClassifier
import tqdm


In [6]:
datasets = ['BC-15','Huongthom', 'Nep87','Q5','Thien_uu','Xi23']

In [7]:
df = pd.read_csv('/home/duyle/Rice_photos/features_extracted/moments/all_with_zernike_pluscolor_enahnced_edgeenhanced_BC-15.csv')

In [8]:
basic_features = ["area", "length", "width", "ratio", "major_axis_length", "minor_axis_length", "convex_hull_area",
                  "convex_hull_perimeter","mean_r","mean_g","mean_b","red_sqr","green_sqr","blue_sqr","texture_mean",
                  "texture_std","texture_uniformity","texture_third_moment"]
enhanced_color_features = ['mean_h','mean_s','mean_v','std_h','std_s','std_v','hue_sqr','sat_sqr',
'val_sqr','mean_y','mean_cr','mean_cb','std_y','std_cr','std_cb','y_sqr','cr_sqr','cb_sqr',
'mean_l','mean_a','mean_b','std_l','std_a','std_b_lab','l_sqr','a_sqr','b_lab_sqr']

zernike_features = [col for col in df.columns if col.startswith("zernike_")]
color_features = [col for col in df.columns if col.startswith("csd_")]
lbp_features = [col for col in df.columns if col.startswith("LBP_")]
glcm_features = [col for col in df.columns if col.startswith("GLCM_")]
gist_features = [col for col in df.columns if col.startswith("GIST_")]
edge_features = [col for col in df.columns if col.startswith("edge_energy")]

In [9]:
feature_groups = {
    "Basic": basic_features,
    "Enhanced Color": enhanced_color_features,
    "Zernike moments": zernike_features,
    "LBP": lbp_features,
    "GLCM": glcm_features,
    # "GIST": gist_features,
    "Edge":edge_features
}

In [13]:
feature_combinations = []
for r in range(3, len(feature_groups) + 1):  # From 1 feature group up to all
    for combo in combinations(feature_groups.keys(), r):
        feature_combinations.append(combo)

In [14]:
all_results = []

In [15]:
models = {
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=20),
    "Support Vector Machine": SVC(kernel='rbf', gamma = "auto", C=7.74263),
    "Random Forest": RandomForestClassifier(n_estimators=150, max_depth=20)
    
}

In [44]:
param_distributions = {
    "K-Nearest Neighbors": {
        "n_neighbors": np.arange(1, 70, 2),
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan"]
    },
    "Support Vector Machine": {
        "C": np.logspace(-3, 2, 10),
        "kernel": ["linear", "rbf", "poly", "sigmoid"],
        "gamma": ["scale", "auto"]
    },
    "Random Forest": {
        "n_estimators": [100, 120, 150, 200, 250],
        "max_depth": [20, 30, 50, 40, None],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "bootstrap": [True, False]
    }
}

# Define base models
models = {
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(),
    "Random Forest": RandomForestClassifier()
}

In [21]:
df=pd.read_csv(f'/home/duyle/Rice_photos/features_extracted/moments/all_with_zernike_pluscolor_enahnced_edgeenhanced_BC-15.csv')


# Ensure 'Label' column is included
for feature_combo in tqdm.tqdm(feature_combinations):
    selected_features = []
    
    # Combine selected feature groups
    for group in tqdm.tqdm(feature_combo):
        selected_features.extend(feature_groups[group])
        
    df_selected = df[selected_features + ["Label"]]
    X = df_selected.drop(columns=['Label'])
    y = df_selected['Label']

    # Normalize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42, stratify=y)



    for name, model in tqdm.tqdm(models.items()):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')

        all_results.append({
            "Model": name,
            "Dataset": 'BC-15',
            "Feature Combination": "+".join(feature_combo),
            "Accuracy": accuracy,
            "Precision": precision,
            "Recall": recall,
            "F1 Score": f1,
            "CV_scores(5fold)": np.mean(cv_scores)
        })
        
        # print(f"🔍 Tuning hyperparameters for {name}...")

        # random_search = RandomizedSearchCV(
        #     model,
        #     param_distributions[name],  # Get corresponding param grid
        #     n_iter=20,  # Number of parameter settings sampled
        #     scoring='accuracy',
        #     cv=5,  # 5-fold cross-validation
        #     verbose=1,
        #     random_state=42,
        #     n_jobs=-1  # Use all available CPUs
        # )

        # random_search.fit(X_train, y_train)  # Perform search

        # Get the best model from the search
        # best_model = random_search.best_estimator_
        # best_params = random_search.best_params_

        # # Make predictions using the best model
        # y_pred = best_model.predict(X_test)

        # # Compute evaluation metrics
        # accuracy = accuracy_score(y_test, y_pred)
        # precision = precision_score(y_test, y_pred, average='weighted')
        # recall = recall_score(y_test, y_pred, average='weighted')
        # f1 = f1_score(y_test, y_pred, average='weighted')
        # cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')

        # # Save results
        # all_results.append({
        #     "Model": name,
        #     "Best Params": best_params,
        #     "COMBINATION": feature_combo,
        #     "Accuracy": accuracy,
        #     "Precision": precision,
        #     "Recall": recall,
        #     "F1 Score": f1,
        #     "CV_scores(5fold)": np.mean(cv_scores)
        # })




100%|██████████| 3/3 [00:00<00:00, 42224.54it/s]
100%|██████████| 3/3 [00:11<00:00,  3.69s/it]
100%|██████████| 3/3 [00:00<00:00, 50131.12it/s]
100%|██████████| 3/3 [00:09<00:00,  3.02s/it]
100%|██████████| 3/3 [00:00<00:00, 58254.22it/s]
100%|██████████| 3/3 [00:10<00:00,  3.62s/it]
100%|██████████| 3/3 [00:00<00:00, 74898.29it/s]
100%|██████████| 3/3 [00:10<00:00,  3.47s/it]
100%|██████████| 3/3 [00:00<00:00, 45923.04it/s]
100%|██████████| 3/3 [00:10<00:00,  3.42s/it]
100%|██████████| 3/3 [00:00<00:00, 48770.98it/s]
100%|██████████| 3/3 [00:10<00:00,  3.46s/it]
100%|██████████| 3/3 [00:00<00:00, 46951.16it/s]
100%|██████████| 3/3 [00:10<00:00,  3.57s/it]
100%|██████████| 3/3 [00:00<00:00, 73584.28it/s]
100%|██████████| 3/3 [00:08<00:00,  2.95s/it]
100%|██████████| 3/3 [00:00<00:00, 12336.19it/s]
100%|██████████| 3/3 [00:09<00:00,  3.13s/it]
100%|██████████| 3/3 [00:00<00:00, 48026.38it/s]
100%|██████████| 3/3 [00:10<00:00,  3.55s/it]
100%|██████████| 3/3 [00:00<00:00, 68385.39it/s]
1

In [23]:
results_df

Unnamed: 0,Model,Dataset,Feature Combination,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,BC-15,Basic+Enhanced Color+Zernike moments,0.891269,0.900086,0.891269,0.890681,0.897686
1,Support Vector Machine,BC-15,Basic+Enhanced Color+Zernike moments,0.943987,0.944298,0.943987,0.943978,0.945999
2,Random Forest,BC-15,Basic+Enhanced Color+Zernike moments,0.911038,0.912497,0.911038,0.910964,0.922036
3,K-Nearest Neighbors,BC-15,Basic+Enhanced Color+LBP,0.869028,0.881828,0.869028,0.867944,0.858299
4,Support Vector Machine,BC-15,Basic+Enhanced Color+LBP,0.923394,0.924980,0.923394,0.923326,0.924073
...,...,...,...,...,...,...,...,...
204,Support Vector Machine,BC-15,Enhanced Color+Zernike moments+LBP+GLCM+Edge,0.944811,0.944958,0.944811,0.944807,0.946004
205,Random Forest,BC-15,Enhanced Color+Zernike moments+LBP+GLCM+Edge,0.898682,0.900704,0.898682,0.898561,0.913106
206,K-Nearest Neighbors,BC-15,Basic+Enhanced Color+Zernike moments+LBP+GLCM+...,0.881384,0.894855,0.881384,0.880384,0.884683
207,Support Vector Machine,BC-15,Basic+Enhanced Color+Zernike moments+LBP+GLCM+...,0.950577,0.950892,0.950577,0.950569,0.943568


In [None]:
all_results

[{'Model': 'K-Nearest Neighbors',
  'Best Params': {'weights': 'distance',
   'n_neighbors': np.int64(15),
   'metric': 'euclidean'},
  'COMBINATION': 'basic + zernike + enhancedcolor+edge+lbp',
  'Accuracy': 0.8813838550247117,
  'Precision': 0.894854753369316,
  'Recall': 0.8813838550247117,
  'F1 Score': 0.8803840911161456,
  'CV_scores(5fold)': np.float64(0.8944326258678409)},
 {'Model': 'Support Vector Machine',
  'Best Params': {'kernel': 'rbf',
   'gamma': 'auto',
   'C': np.float64(7.742636826811277)},
  'COMBINATION': 'basic + zernike + enhancedcolor+edge+lbp',
  'Accuracy': 0.9365733113673805,
  'Precision': 0.9371000368598402,
  'Recall': 0.9365733113673805,
  'F1 Score': 0.9365561356869031,
  'CV_scores(5fold)': np.float64(0.9431611669057867)},
 {'Model': 'Random Forest',
  'Best Params': {'n_estimators': 150,
   'min_samples_split': 10,
   'min_samples_leaf': 1,
   'max_depth': 40,
   'bootstrap': True},
  'COMBINATION': 'basic + zernike + enhancedcolor+edge+lbp',
  'Accur

In [12]:
for data in datasets:
    df=pd.read_csv(f'/home/duyle/Rice_photos/features_extracted/moments/all_with_zernike_pluscolor_enahnced_{data}.csv')
    for feature_combo in feature_combinations:
        selected_features = []
        
        # Combine selected feature groups
        for group in feature_combo:
            selected_features.extend(feature_groups[group])

        # Ensure 'Label' column is included
        df_selected = df[selected_features + ["Label"]]

        X = df_selected.drop(columns=['Label'])
        y = df_selected['Label']

        # Normalize the features
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42, stratify=y)


        for name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='weighted')
            recall = recall_score(y_test, y_pred, average='weighted')
            f1 = f1_score(y_test, y_pred, average='weighted')
            cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')

            all_results.append({
                "Model": name,
                "Dataset": data,
                "Feature Combination": "+".join(feature_combo),
                "Accuracy": accuracy,
                "Precision": precision,
                "Recall": recall,
                "F1 Score": f1,
                "CV_scores(5fold)": np.mean(cv_scores)
            })





In [13]:
res_df = pd.DataFrame(all_results)
res_df

Unnamed: 0,Model,Dataset,Feature Combination,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,BC-15,Basic,0.851730,0.864166,0.851730,0.850480,0.845304
1,Support Vector Machine,BC-15,Basic,0.902801,0.908246,0.902801,0.902486,0.895244
2,Random Forest,BC-15,Basic,0.876442,0.880430,0.876442,0.876129,0.862767
3,K-Nearest Neighbors,BC-15,Enhanced Color,0.742998,0.743057,0.742998,0.742977,0.744211
4,Support Vector Machine,BC-15,Enhanced Color,0.784185,0.784445,0.784185,0.784142,0.792536
...,...,...,...,...,...,...,...,...
1129,Support Vector Machine,Xi23,Enhanced Color+Zernike moments+LBP+GLCM+GIST,0.924708,0.925240,0.924708,0.924553,0.924746
1130,Random Forest,Xi23,Enhanced Color+Zernike moments+LBP+GLCM+GIST,0.919591,0.919725,0.919591,0.919497,0.912134
1131,K-Nearest Neighbors,Xi23,Basic+Enhanced Color+Zernike moments+LBP+GLCM+...,0.845760,0.859580,0.845760,0.842777,0.847668
1132,Support Vector Machine,Xi23,Basic+Enhanced Color+Zernike moments+LBP+GLCM+...,0.929825,0.930104,0.929825,0.929721,0.929424


In [14]:
res_df.to_csv('tested_all_2.csv',index=False)

In [45]:
res_df.to_csv('tested_all.csv',index=False)

In [31]:
res_df = pd.DataFrame(results)
res_df

Unnamed: 0,Model,Dataset,Features,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,BC-15,zernike+basic,0.891269,0.900086,0.891269,0.890681,0.897686
1,Support Vector Machine,BC-15,zernike+basic,0.937397,0.938473,0.937397,0.937361,0.937879
2,Random Forest,BC-15,zernike+basic,0.910214,0.911438,0.910214,0.910152,0.917978
3,K-Nearest Neighbors,Huongthom,zernike+basic,0.948175,0.948379,0.948175,0.948163,0.95036
4,Support Vector Machine,Huongthom,zernike+basic,0.975912,0.975936,0.975912,0.975911,0.977338
5,Random Forest,Huongthom,zernike+basic,0.957664,0.957992,0.957664,0.957663,0.965468
6,K-Nearest Neighbors,Nep87,zernike+basic,0.98314,0.983346,0.98314,0.983134,0.976614
7,Support Vector Machine,Nep87,zernike+basic,0.986301,0.986658,0.986301,0.986294,0.984924
8,Random Forest,Nep87,zernike+basic,0.984194,0.984361,0.984194,0.984189,0.97765
9,K-Nearest Neighbors,Q5,zernike+basic,0.945674,0.949507,0.945674,0.945547,0.941971


In [32]:
res_df.to_csv('testwithcolorenhanced.csv',index=False)

In [11]:
res_df = pd.DataFrame(results)
res_df

Unnamed: 0,Model,Dataset,Features,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,BC-15,zernike+basic,0.878089,0.887406,0.878089,0.877369,0.874544
1,Support Vector Machine,BC-15,zernike+basic,0.924217,0.925406,0.924217,0.924168,0.928538
2,Random Forest,BC-15,zernike+basic,0.889621,0.892516,0.889621,0.889426,0.882663
3,K-Nearest Neighbors,Huongthom,zernike+basic,0.912409,0.913634,0.912409,0.912316,0.909712
4,Support Vector Machine,Huongthom,zernike+basic,0.946715,0.9468,0.946715,0.946717,0.955036
5,Random Forest,Huongthom,zernike+basic,0.916058,0.916557,0.916058,0.916049,0.923381
6,K-Nearest Neighbors,Nep87,zernike+basic,0.977871,0.978033,0.977871,0.977864,0.974014
7,Support Vector Machine,Nep87,zernike+basic,0.987355,0.987565,0.987355,0.98735,0.981807
8,Random Forest,Nep87,zernike+basic,0.98314,0.983346,0.98314,0.983134,0.975054
9,K-Nearest Neighbors,Q5,zernike+basic,0.93159,0.93563,0.93159,0.931416,0.935023


In [17]:
res_df.to_csv('testwithoutcolor.csv',index=False)

In [9]:
res_df = pd.DataFrame(results)
res_df

Unnamed: 0,Model,Features,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,zernike+basic,0.860791,0.872848,0.860791,0.859681,0.875346
1,Support Vector Machine,zernike+basic,0.88715,0.893173,0.88715,0.886729,0.898084
2,Random Forest,zernike+basic,0.910214,0.912916,0.910214,0.910074,0.909044


In [68]:
df

Unnamed: 0,Model,Dataset,Features,Accuracy,Precision,Recall,F1 Score
0,KNN,BC15,basic,0.8402,0.8466,0.8402,0.8395
1,SVM,BC15,basic,0.8987,0.9039,0.8987,0.8984
2,Random Forest,BC15,basic,0.8526,0.8543,0.8526,0.8524
3,KNN,BC15,gist,0.6367,0.6368,0.6367,0.6367
4,SVM,BC15,gist,0.6960,0.6963,0.6960,0.6959
...,...,...,...,...,...,...,...
67,SVM,Xi23,glcm,0.7727,0.7786,0.7727,0.7693
68,Random Forest,Xi23,glcm,0.7580,0.7579,0.7580,0.7580
69,KNN,Xi23,lbp,0.6681,0.6676,0.6681,0.6653
70,SVM,Xi23,lbp,0.6886,0.6896,0.6886,0.6848


In [53]:
df = pd.DataFrame(results)


In [57]:
df = df.round(4)

In [72]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)


In [74]:
df

Unnamed: 0,Model,Dataset,Features,Accuracy,Precision,Recall,F1 Score
0,KNN,BC15,basic,0.8402,0.8466,0.8402,0.8395
1,SVM,BC15,basic,0.8987,0.9039,0.8987,0.8984
2,Random Forest,BC15,basic,0.8526,0.8543,0.8526,0.8524
3,KNN,BC15,gist,0.6367,0.6368,0.6367,0.6367
4,SVM,BC15,gist,0.696,0.6963,0.696,0.6959
5,Random Forest,BC15,gist,0.7702,0.7707,0.7702,0.7701
6,KNN,BC15,glcm,0.6738,0.6766,0.6738,0.6726
7,SVM,BC15,glcm,0.7512,0.7512,0.7512,0.7512
8,Random Forest,BC15,glcm,0.7166,0.7167,0.7166,0.7166
9,KNN,BC15,lbp,0.5568,0.557,0.5568,0.5567


In [76]:
df[df[Features == 'basic']]

NameError: name 'Features' is not defined

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  
    print(df)

ValueError: Value must be a nonnegative integer or None

In [59]:
df.to_csv('results.csv',index=False)