In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.preprocessing import StandardScaler

In [4]:
def manhattan_distance(x, y):
    """Tính khoảng cách Manhattan (slide trang 19)"""
    return sum(abs(a - b) for a, b in zip(x, y))

def knn_predict(X_train, y_train, X_test, k=9):
    """Dự đoán nhãn bằng KNN (slide trang 14)"""
    predictions = []
    for test_point in X_test:
        distances = [(manhattan_distance(test_point, train_point), label)
                     for train_point, label in zip(X_train, y_train)]
        distances.sort(key=lambda x: x[0])
        k_nearest_labels = [label for _, label in distances[:k]]
        most_common = Counter(k_nearest_labels).most_common(1)[0][0]
        predictions.append(most_common)
    return predictions

def classify_iris_knn(train_file, test_file, k=9):
    """Phân loại hoa Iris bằng KNN với tệp train/test"""
    try:
        # Đọc dữ liệu train
        train_data = pd.read_csv(train_file, header=None)
        X_train = train_data.iloc[:, :-1].values  # 4 đặc trưng
        y_train = train_data.iloc[:, -1].values   # Nhãn
        
        # Đọc dữ liệu test
        test_data = pd.read_csv(test_file, header=None)
        # Chỉ lấy 4 đặc trưng, bỏ cột nhãn nếu có
        X_test = test_data.iloc[:, :4].values
        
    except FileNotFoundError as e:
        print(f"Error: File not found - {e}")
        return None
    except Exception as e:
        print(f"Error reading files: {e}")
        return None
    
    # Chuẩn hóa dữ liệu (slide trang 23)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Dự đoán
    predictions = knn_predict(X_train, y_train, X_test, k=k)
    
    # In kết quả
    print(f"KNN Predictions for {test_file}:")
    for i, pred in enumerate(predictions, 1):
        print(f"Sample {i}: {pred}")
    
    return predictions

In [6]:
train_file = "input_2.csv"
test_file = "output_2.csv"
classify_iris_knn(train_file, test_file)

KNN Predictions for output_2.csv:
Sample 1: Iris-setosa
Sample 2: Iris-setosa
Sample 3: Iris-setosa
Sample 4: Iris-setosa
Sample 5: Iris-setosa
Sample 6: Iris-setosa
Sample 7: Iris-setosa
Sample 8: Iris-setosa
Sample 9: Iris-setosa
Sample 10: Iris-setosa
Sample 11: Iris-versicolor
Sample 12: Iris-versicolor
Sample 13: Iris-versicolor
Sample 14: Iris-versicolor
Sample 15: Iris-versicolor
Sample 16: Iris-versicolor
Sample 17: Iris-versicolor
Sample 18: Iris-versicolor
Sample 19: Iris-versicolor
Sample 20: Iris-versicolor
Sample 21: Iris-versicolor
Sample 22: Iris-versicolor
Sample 23: Iris-versicolor
Sample 24: Iris-virginica
Sample 25: Iris-virginica
Sample 26: Iris-virginica
Sample 27: Iris-virginica
Sample 28: Iris-virginica
Sample 29: Iris-versicolor
Sample 30: Iris-virginica


['Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-setosa',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-versicolor',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-virginica',
 'Iris-versicolor',
 'Iris-virginica']