In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
rice = pd.read_csv('rice_seed_result.csv')

In [10]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

In [11]:
rice[rice['Feature']== 'Basic']

Unnamed: 0,Rice Seed,Model,Feature,Precision,Recall,F1-score,Accuracy,Best Param
0,BC-15,KNN,Basic,0.806676,0.938511,0.867614,0.854201,{'n_neighbors': 20}
1,BC-15,SVM,Basic,0.86756,0.943366,0.903876,0.897858,"{'C': 1, 'kernel': 'rbf'}"
2,BC-15,RF,Basic,0.841867,0.904531,0.872075,0.864909,"{'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 150}"
45,HuongThom,KNN,Basic,0.871369,0.897436,0.884211,0.879562,{'n_neighbors': 10}
46,HuongThom,SVM,Basic,0.929972,0.945869,0.937853,0.935766,"{'C': 1, 'kernel': 'rbf'}"
47,HuongThom,RF,Basic,0.908832,0.908832,0.908832,0.906569,"{'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 200}"
90,Nep87,KNN,Basic,0.967816,0.963387,0.965596,0.968388,{'n_neighbors': 10}
91,Nep87,SVM,Basic,0.961538,0.97254,0.967008,0.969442,"{'C': 0.5, 'kernel': 'linear'}"
92,Nep87,RF,Basic,0.96789,0.965675,0.966781,0.969442,"{'criterion': 'entropy', 'max_depth': 10, 'n_estimators': 140}"
135,Q5,KNN,Basic,0.899083,0.964567,0.930674,0.926559,{'n_neighbors': 10}


In [4]:
df_grouped_seed = rice.groupby("Rice Seed").mean(numeric_only=True)
df_grouped_seed

Unnamed: 0_level_0,Precision,Recall,F1-score,Accuracy
Rice Seed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BC-15,0.782588,0.805322,0.79258,0.788962
HuongThom,0.842167,0.885755,0.862545,0.85335
Nep87,0.896323,0.929519,0.911597,0.914998
Q5,0.790394,0.844663,0.815503,0.804762
ThienUu,0.897088,0.920875,0.908514,0.907721
Xi-23,0.897088,0.920875,0.908514,0.907721


In [5]:
df_grouped_model= rice.groupby("Model").mean(numeric_only=True)
df_grouped_model

Unnamed: 0_level_0,Precision,Recall,F1-score,Accuracy
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KNN,0.800671,0.850701,0.823065,0.817672
RF,0.87142,0.901283,0.885767,0.882725
SVM,0.880732,0.901521,0.890794,0.88836


In [12]:
df_grouped_feature= rice.groupby("Feature").mean(numeric_only=True)
df_grouped_feature

Unnamed: 0_level_0,Precision,Recall,F1-score,Accuracy
Feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
All,0.884514,0.929302,0.905807,0.901966
Basic,0.918543,0.950156,0.933749,0.931556
Basic GLCM,0.923251,0.953064,0.937686,0.935728
Basic GLCM GIST,0.883735,0.92756,0.904234,0.900066
Basic Gist,0.878403,0.920815,0.898362,0.894177
Basic LBP,0.917578,0.947201,0.931861,0.929554
Basic LBP GIST,0.87819,0.926379,0.900815,0.896307
Basic LBP GLCM,0.923391,0.95136,0.93688,0.934867
GLCM,0.805477,0.83721,0.820316,0.814174
Gist,0.788809,0.813082,0.798106,0.797008


In [8]:
all_df = pd.read_csv(r'/home/duyle/Rice_photos/features_extracted/moments/tested_all.csv')

In [10]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

In [11]:
all_df

Unnamed: 0,Model,Dataset,Feature Combination,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
0,K-Nearest Neighbors,BC-15,Basic,0.857496,0.864337,0.857496,0.856843,0.848552
1,Support Vector Machine,BC-15,Basic,0.902801,0.908246,0.902801,0.902486,0.895244
2,Random Forest,BC-15,Basic,0.878089,0.882915,0.878089,0.877717,0.864792
3,K-Nearest Neighbors,BC-15,Enhanced Color,0.737232,0.738754,0.737232,0.736786,0.740152
4,Support Vector Machine,BC-15,Enhanced Color,0.784185,0.784445,0.784185,0.784142,0.792536
...,...,...,...,...,...,...,...,...
2281,Support Vector Machine,Xi23,Enhanced Color+Zernike moments+Color+LBP+GLCM+GIST,0.921053,0.921886,0.921053,0.921136,0.917531
2282,Random Forest,Xi23,Enhanced Color+Zernike moments+Color+LBP+GLCM+GIST,0.920322,0.920304,0.920322,0.920308,0.915022
2283,K-Nearest Neighbors,Xi23,Basic+Enhanced Color+Zernike moments+Color+LBP+GLCM+GIST,0.850877,0.854733,0.850877,0.849653,0.859559
2284,Support Vector Machine,Xi23,Basic+Enhanced Color+Zernike moments+Color+LBP+GLCM+GIST,0.923246,0.924132,0.923246,0.923329,0.923294


In [20]:

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [22]:
x = all_df[all_df['Model'] == 'Random Forest']

In [50]:
all_df_group = all_df.groupby("Dataset").mean(numeric_only=True)
all_df_group

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BC-15,0.853373,0.855998,0.853373,0.853075,0.854848
Huongthom,0.927224,0.928952,0.927224,0.927093,0.927391
Nep87,0.93833,0.942135,0.93833,0.938,0.931829
Q5,0.896542,0.898303,0.896542,0.896326,0.896562
Thien_uu,0.945964,0.947776,0.945964,0.945884,0.94502
Xi23,0.873655,0.875437,0.873655,0.87294,0.872223


In [23]:
x.sort_values(by='CV_scores(5fold)',ascending=False)

Unnamed: 0,Model,Dataset,Feature Combination,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
1643,Random Forest,Thien_uu,Basic+Color+GIST,0.984894,0.984968,0.984894,0.984895,0.991813
1769,Random Forest,Thien_uu,Basic+Color+GLCM+GIST,0.989426,0.989541,0.989426,0.989427,0.987349
1844,Random Forest,Thien_uu,Basic+Enhanced Color+Color+GLCM+GIST,0.990937,0.99101,0.990937,0.990937,0.987349
1733,Random Forest,Thien_uu,Basic+Enhanced Color+Color+GIST,0.989426,0.989468,0.989426,0.989427,0.986603
1841,Random Forest,Thien_uu,Basic+Enhanced Color+Color+LBP+GIST,0.986405,0.986447,0.986405,0.986406,0.986603
1766,Random Forest,Thien_uu,Basic+Color+LBP+GIST,0.981873,0.982037,0.981873,0.981875,0.985116
1730,Random Forest,Thien_uu,Basic+Enhanced Color+Color+GLCM,0.989426,0.989431,0.989426,0.989426,0.985113
1580,Random Forest,Thien_uu,Zernike moments+Color,0.983384,0.983749,0.983384,0.983385,0.983632
1553,Random Forest,Thien_uu,Basic+Color,0.983384,0.983499,0.983384,0.983385,0.983629
1691,Random Forest,Thien_uu,Zernike moments+Color+GIST,0.981873,0.982163,0.981873,0.981874,0.982889


In [None]:
Basic+Enhanced Color+Zernike moments+GLCM/LBP

In [46]:
all_df[all_df['Dataset']=='Nep87'].sort_values(by='Accuracy',ascending=False)

Unnamed: 0,Model,Dataset,Feature Combination,Accuracy,Precision,Recall,F1 Score,CV_scores(5fold)
955,Support Vector Machine,Nep87,Basic+Enhanced Color+Zernike moments+LBP,0.988409,0.988665,0.988409,0.988404,0.984924
865,Support Vector Machine,Nep87,Basic+Zernike moments+LBP,0.987355,0.987565,0.987355,0.98735,0.980246
895,Support Vector Machine,Nep87,Enhanced Color+Zernike moments+LBP,0.987355,0.987565,0.987355,0.98735,0.987004
784,Support Vector Machine,Nep87,Basic+Enhanced Color,0.987355,0.987387,0.987355,0.987353,0.977649
787,Support Vector Machine,Nep87,Basic+Zernike moments,0.987355,0.987565,0.987355,0.98735,0.981807
820,Support Vector Machine,Nep87,Zernike moments+LBP,0.986301,0.986402,0.986301,0.986298,0.976092
1066,Support Vector Machine,Nep87,Basic+Enhanced Color+Zernike moments+LBP+GLCM,0.986301,0.98647,0.986301,0.986297,0.98648
847,Support Vector Machine,Nep87,Basic+Enhanced Color+Zernike moments,0.986301,0.986658,0.986301,0.986294,0.984924
931,Support Vector Machine,Nep87,Zernike moments+LBP+GLCM,0.986301,0.986402,0.986301,0.986298,0.981284
1021,Support Vector Machine,Nep87,Enhanced Color+Zernike moments+LBP+GLCM,0.986301,0.986555,0.986301,0.986295,0.986999
