In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tqdm import tqdm
from itertools import combinations

In [2]:
# 39 features
basic_cols = ["area", "length", "width", "ratio", "major_axis_length", 
              "minor_axis_length", "convex_hull_area", "convex_hull_perimeter",
              "compactness", "eccentricity", "perimeter_per_init_area",
              "texture_mean", "texture_std", "texture_uniformity", "texture_third_moment", 
              "mean_r", "mean_g", "mean_B", "std_r", "std_g", "std_B",
              "mean_h", "mean_s", "mean_v", "std_h", "std_s", "std_v", 
              "mean_l", "mean_a", "mean_b", "std_l", "std_a", "std_b", 
              "mean_y", "mean_cb", "mean_cr", "std_y", "std_cb", "std_cr"
              ]
# 10 features
lbp_cols = [f"LBP_{i}" for i in range(10)]
# 512 features
gist_cols = [f"GIST_{i}" for i in range(512)]
# 16 features
glcm_cols = [f"GLCM_{i}" for i in range(16)]
# 16 features
central_cols = [f"Central_{i}" for i in range(16)]
# 500 features
sift_cols = [f"SIFT_{i}" for i in range(500)]

feature = {"Basic": basic_cols,
            "LBP": lbp_cols,
            "GIST": gist_cols,
            "Central": central_cols,
            "SIFT": sift_cols,
            "GLCM": glcm_cols}


def combination_features(groups, k):
    combos = {}
    for combo in combinations(groups.keys(), k):
        name = " ".join(combo)
        group_feature = [groups[i] for i in combo]
        group_feature = np.hstack(group_feature, dtype=np.object_)
        combos[name] = group_feature
    return combos


set_of_features = {}
for i in range(1, 3):
    set_of_features.update(combination_features(feature, i))


In [3]:
def modeling(data, features, label, fea_name, ric_name):
    x = data[features]
    y = data[label]

    x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.67, random_state=42)

    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    result = {"Rice Seed": [ric_name] * 3,
              "Model": [],
              "Feature": [fea_name] * 3,
              "Precision": [],
              "Recall": [],
              "F1-score": [],
              "Accuracy": [],
              "Best Param": []}

    param_gird_RF = {"n_estimators": [10, 30, 50, 70, 80, 100, 110, 120, 140, 150, 180, 200],
                  "max_depth": [None, 10, 20, 30, 40, 50, 60, 70],
                  "criterion": ["gini", "entropy"]}

    param_gird_SVC = {"kernel": ["linear", "poly", "rbf", "sigmoid"],
                      "C": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]}

    param_gird_KNN = {"n_neighbors": [5, 10, 15, 20]}

    models = {"KNN": (KNeighborsClassifier(), param_gird_KNN),
              "SVM": (SVC(), param_gird_SVC),
              "RF": (RandomForestClassifier(random_state=42), param_gird_RF)}

    for name, model in models.items():
        grid = GridSearchCV(estimator=model[0],
                            param_grid=model[1],
                            cv=5,
                            scoring="accuracy",
                            n_jobs=6)
        grid.fit(x_train, y_train)
        best_param = grid.best_params_
        best_estimator = grid.best_estimator_
        y_pre = best_estimator.predict(x_test)

        result["Model"].append(name)
        result["Precision"].append(precision_score(y_test, y_pre))
        result["Recall"].append(recall_score(y_test, y_pre))
        result["Accuracy"].append(accuracy_score(y_test, y_pre))
        result["F1-score"].append(f1_score(y_test, y_pre))
        result["Best Param"].append(best_param)

    return pd.DataFrame(result)

In [4]:
df_bc15 = pd.read_csv(r"D:\python\rice_data\Feature_Extraction\bc15.csv")
df_huongthom = pd.read_csv(r"D:\python\rice_data\Feature_Extraction\huongthom.csv")
df_nep87 = pd.read_csv(r"D:\python\rice_data\Feature_Extraction\nep87.csv")
df_q5 = pd.read_csv(r"D:\python\rice_data\Feature_Extraction\Q5.csv")
df_thien_uu = pd.read_csv(r"D:\python\rice_data\Feature_Extraction\thien_uu.csv")
df_xi23 = pd.read_csv(r"D:\python\rice_data\Feature_Extraction\xi23.csv")

df = {"BC-15": df_bc15,
      "HuongThom": df_huongthom,
      "Nep87": df_nep87,
      "Q5": df_q5,
      "ThienUu": df_thien_uu,
      "Xi-23":df_xi23}

In [5]:
df_result = []

for rice_name, rice in tqdm(df.items(), desc="Rice Seed"):
  for feature_name, feature in tqdm(set_of_features.items(), desc="Processing"):
    result_model = modeling(data=rice, features=feature, label="Label", 
                            fea_name=feature_name, ric_name=rice_name)
    df_result.append(result_model)

df_result = pd.concat(df_result, ignore_index=True)

df_result

Rice Seed:   0%|          | 0/6 [00:00<?, ?it/s]
Processing:   0%|          | 0/63 [00:00<?, ?it/s][A
Processing:   2%|▏         | 1/63 [02:20<2:24:57, 140.27s/it][A
Processing:   3%|▎         | 2/63 [08:42<4:25:31, 261.18s/it][A
Rice Seed:   0%|          | 0/6 [08:42<?, ?it/s]


KeyboardInterrupt: 

In [None]:
df_result.to_csv(r"D:\python\rice_data\Feature_Extraction\rice_seed_result.csv", index=False)