### 1. Trích đặc trưng Hu's moment và đặc trưng HOG. Lưu lại dưới dạng bảng Excel và đặt trong thư mục "Hu_nhom_08" và thư mục "HOG_nhom_08". Gán nhãn cho từng vector đặc trưng.


In [4]:
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog
import os
from skimage import color
from sklearn.preprocessing import LabelEncoder


In [None]:
def get_hu_moments(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    moments = cv2.moments(thresh)
    huMoments = cv2.HuMoments(moments)

    for i in range(0, 7):
        huMoments[i] = -1 * np.sign(huMoments[i]) * np.log10(np.abs(huMoments[i]))

    return huMoments


In [None]:
def get_hog_features(image_path):
    image = cv2.imread(image_path)
    gray = color.rgb2gray(image)

    gray = cv2.resize(gray, (200, 100))

    fd = hog(gray, orientations=9, pixels_per_cell=(10, 10), cells_per_block=(2, 2), transform_sqrt=True, block_norm="L1")

    return fd


In [None]:
def save_features_to_excel(features, labels, folder, file_name):
    df = pd.DataFrame(features)
    df['label'] = labels
    if not os.path.exists(folder):
        os.makedirs(folder)
    df.to_excel(f'{folder}/{file_name}.xlsx')

In [None]:
folders = ['Leaf_Nhom_8/Ligustrum lucidum Ait. f', 'Leaf_Nhom_8/Phyllostachys edulis (Carr.) Houz','Leaf_Nhom_8/Prunus serrulata Lindl. var. lannesiana auct' ]

all_hu_moments = []
all_hog_features = []
labels = []

In [None]:
for folder in folders:
    for file_name in os.listdir(folder):
        file_path = os.path.join(folder, file_name)
        hu_moments = get_hu_moments(file_path)
        hog_features = get_hog_features(file_path)
        label = folder.split('/')[-1] 
        all_hu_moments.append(hu_moments)
        all_hog_features.append(hog_features)
        labels.append(label)

In [None]:
all_hu_moments = np.squeeze(all_hu_moments)
all_hog_features = np.squeeze(all_hog_features)

save_features_to_excel(all_hu_moments, labels, 'Hu_nhom_8', 'hu_moments')
save_features_to_excel(all_hog_features, labels, 'HOG_nhom_8', 'hog_features')

### 2. Thực hiện phân loại lá cây dùng phương pháp KNN. Đánh giá dùng phương pháp cross validation.

#### 3.1 KNN + Hu's moment

In [44]:
import pandas as pd

data_hu = pd.read_excel('Hu_nhom_8/hu_moments.xlsx')
data_hu.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,label
0,0,3.106052,6.661818,11.63466,12.955794,-25.262271,-16.293102,-25.899424,Ligustrum lucidum Ait. f
1,1,3.14598,6.939851,11.427347,12.40898,24.327604,15.884834,25.664164,Ligustrum lucidum Ait. f
2,2,3.149945,6.964705,11.743074,12.748875,24.998285,16.236095,-25.896948,Ligustrum lucidum Ait. f
3,3,3.088872,6.571084,11.884503,12.967159,25.884861,-17.420964,25.416791,Ligustrum lucidum Ait. f
4,4,3.152165,6.981056,11.92808,13.038729,25.623877,16.681436,-25.735647,Ligustrum lucidum Ait. f


In [45]:
X = data_hu.drop(columns=['Unnamed: 0', 'label'])
y = data_hu['label']

# Encode the labels into integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [46]:
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV


param_grid_knn = {
    'n_neighbors': range(2, 15),
    'metric': ['euclidean', 'manhattan']
}

cv = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
grid_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=cv, scoring='accuracy')
grid_knn.fit(X, y)

best_index = grid_knn.best_index_
cv_results = grid_knn.cv_results_

print(f"Optimal parameters for KNN: {grid_knn.best_params_}")
print(f"Best cross-validation accuracy: {grid_knn.best_score_:.2f}%")
print(f"Cross-validation scores for the best parameters: {cv_results['split0_test_score'][best_index]:.2f}, "
      f"{cv_results['split1_test_score'][best_index]:.2f}, "
      f"{cv_results['split2_test_score'][best_index]:.2f}, "
      f"{cv_results['split3_test_score'][best_index]:.2f}, "
      f"{cv_results['split4_test_score'][best_index]:.2f}")

Optimal parameters for KNN: {'metric': 'manhattan', 'n_neighbors': 2}
Best cross-validation accuracy: 0.84%
Cross-validation scores for the best parameters: 0.82, 0.85, 0.85, 0.85, 0.79


#### 3.2 KNN + HOG

In [47]:
import pandas as pd

data_hog = pd.read_excel('HOG_nhom_8/hog_features.xlsx')
data_hog.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,6147,6148,6149,6150,6151,6152,6153,6154,6155,label
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
3,3,0.030831,0.01801,0.0,0.005129,3.1e-05,0.0,0.022859,0.002515,0.007366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f


In [48]:
X = data_hog.drop(columns=['Unnamed: 0', 'label'])
y = data_hog['label']

# Encode the labels into integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [49]:
grid_knn.fit(X, y)

best_index = grid_knn.best_index_
cv_results = grid_knn.cv_results_


print(f"Optimal parameters for KNN: {grid_knn.best_params_}")
print(f"Best cross-validation accuracy: {grid_knn.best_score_:.2f}%")
print(f"Cross-validation scores for the best parameters: {cv_results['split0_test_score'][best_index]:.2f}, "
      f"{cv_results['split1_test_score'][best_index]:.2f}, "
      f"{cv_results['split2_test_score'][best_index]:.2f}, "
      f"{cv_results['split3_test_score'][best_index]:.2f}, "
      f"{cv_results['split4_test_score'][best_index]:.2f}")

Optimal parameters for KNN: {'metric': 'manhattan', 'n_neighbors': 11}
Best cross-validation accuracy: 0.99%
Cross-validation scores for the best parameters: 1.00, 1.00, 0.97, 1.00, 1.00


#### 3.3 SVM + Hu

In [50]:
import pandas as pd

data_hu = pd.read_excel('Hu_nhom_8/hu_moments.xlsx')
data_hu.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,label
0,0,3.106052,6.661818,11.63466,12.955794,-25.262271,-16.293102,-25.899424,Ligustrum lucidum Ait. f
1,1,3.14598,6.939851,11.427347,12.40898,24.327604,15.884834,25.664164,Ligustrum lucidum Ait. f
2,2,3.149945,6.964705,11.743074,12.748875,24.998285,16.236095,-25.896948,Ligustrum lucidum Ait. f
3,3,3.088872,6.571084,11.884503,12.967159,25.884861,-17.420964,25.416791,Ligustrum lucidum Ait. f
4,4,3.152165,6.981056,11.92808,13.038729,25.623877,16.681436,-25.735647,Ligustrum lucidum Ait. f


In [51]:
X = data_hu.drop(columns=['Unnamed: 0', 'label'])
y = data_hu['label']

# Encode the labels into integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [52]:
from sklearn.svm import SVC

param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
    'gamma': ['scale', 'auto']
}

grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=cv, scoring='accuracy')
grid_svm.fit(X, y)

best_index = grid_knn.best_index_
cv_results = grid_knn.cv_results_


print(f"Optimal parameters for SVM: {grid_svm.best_params_}")
print(f"Best cross-validation accuracy: {grid_svm.best_score_:.2f}%")

print(f"Cross-validation scores for the best parameters: {cv_results['split0_test_score'][best_index]:.2f}, "
      f"{cv_results['split1_test_score'][best_index]:.2f}, "
      f"{cv_results['split2_test_score'][best_index]:.2f}, "
      f"{cv_results['split3_test_score'][best_index]:.2f}, "
      f"{cv_results['split4_test_score'][best_index]:.2f}")

Optimal parameters for SVM: {'C': 10, 'gamma': 'scale', 'kernel': 'linear'}
Best cross-validation accuracy: 0.91%
Cross-validation scores for the best parameters: 1.00, 1.00, 0.97, 1.00, 1.00


#### 3.4 SVM + HOG

In [53]:
import pandas as pd

data_hog = pd.read_excel('HOG_nhom_8/hog_features.xlsx')
data_hog.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,6147,6148,6149,6150,6151,6152,6153,6154,6155,label
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
3,3,0.030831,0.01801,0.0,0.005129,3.1e-05,0.0,0.022859,0.002515,0.007366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Ligustrum lucidum Ait. f


In [54]:
X = data_hog.drop(columns=['Unnamed: 0', 'label'])
y = data_hog['label']

# Encode the labels into integers
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [55]:
grid_svm.fit(X, y)

best_index = grid_knn.best_index_
cv_results = grid_knn.cv_results_

print(f"Optimal parameters for SVM: {grid_svm.best_params_}")
print(f"Best cross-validation accuracy: {grid_svm.best_score_:.2f}%")

print(f"Cross-validation scores for the best parameters: {cv_results['split0_test_score'][best_index]:.2f}, "
      f"{cv_results['split1_test_score'][best_index]:.2f}, "
      f"{cv_results['split2_test_score'][best_index]:.2f}, "
      f"{cv_results['split3_test_score'][best_index]:.2f}, "
      f"{cv_results['split4_test_score'][best_index]:.2f}")

Optimal parameters for SVM: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Best cross-validation accuracy: 0.99%
Cross-validation scores for the best parameters: 1.00, 1.00, 0.97, 1.00, 1.00
