So sánh độ chính xác giữa phương pháp kNN và LogisticRegression cho tập dữ liệu dogscats_small

In [1]:
import cv2                
from skimage import feature  
import os
import numpy as np
from skimage.feature import hog
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression


In [2]:
root_dir = 'Dataset/dogscats_small'

train_images_path, train_labels = [], []
test_images_path, test_labels = [], []

for dataset_type in ['train', 'test']:
    dataset_path = os.path.join(root_dir, dataset_type) 
    if not os.path.isdir(dataset_path):
        continue
    for image_filename in os.listdir(dataset_path):
        image_path = os.path.join(dataset_path, image_filename)
        if not os.path.isfile(image_path):
            continue
        label = image_filename.split('.')[0] 
        if dataset_type == 'train':
            train_images_path.append(image_path)
            train_labels.append(label)
        else:
            test_images_path.append(image_path)
            test_labels.append(label)


In [3]:
def compute_hog_features(img_paths):
    features = []  
    for img_path in img_paths:
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = cv2.resize(img, (128, 256))
        
        hog_features, hog_image = hog(
            img, 
            orientations=9, 
            pixels_per_cell=(8, 8), 
            cells_per_block=(2, 2), 
            block_norm='L2-Hys', 
            visualize=True, 
            transform_sqrt=True
        )
        
        features.append(hog_features)  
    return np.array(features)  

train_features = compute_hog_features(train_images_path)
test_features = compute_hog_features(test_images_path)

scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)


In [4]:
for k in range(1, 10):
    knn = KNeighborsClassifier(n_neighbors=k)  
    knn.fit(train_features, train_labels)                 
    y_pred = knn.predict(test_features)               
    accuracy = accuracy_score(test_labels, y_pred)
    print(f"Accuracy for k={k}: {accuracy}")

Accuracy for k=1: 0.6
Accuracy for k=2: 0.6
Accuracy for k=3: 0.6
Accuracy for k=4: 0.6
Accuracy for k=5: 0.5666666666666667
Accuracy for k=6: 0.5333333333333333
Accuracy for k=7: 0.6
Accuracy for k=8: 0.5666666666666667
Accuracy for k=9: 0.5666666666666667


In [5]:
log_reg = LogisticRegression(max_iter=20)
log_reg.fit(train_features, train_labels)
y_pred = log_reg.predict(test_features)
accuracy = accuracy_score(test_labels, y_pred)
print(accuracy)

0.4666666666666667


Nhận xét: KNN(k từ 1 tới 9) trong trường hợp này sẽ cho kết quả chính xác hơn LogisticRegression.