# Bài thực hành Lab 01

## Yêu cầu
1. Sử dung tập dữ liệu train và test từ trang web:

https://github.com/metinmertakcay/Color_Classification_using_Color_Descriptors

2. Cho biết kết quả khi áp dụng phương pháp KNN (với k=1, k=5) 

3. Sử dụng ít nhất 4 loại đặc trưng về màu sắc: `Histogram`; `Color moments`; `CDC`;   `CCV`

4. Với 4 độ đo: `Euclidean`, `Correlation`, `Chi-square`, `Intersection`, `Bhattacharyya`



Lưu ý: Nhớ chuyển các vector đặc trưng thành vector đơn vị

In [81]:
# Import lib
import numpy as np
import cv2
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import MinMaxScaler


### Load data

In [82]:
def load_dataset(data_dir, class_colors):
    X = [];
    y = []

    for label_dir in os.listdir(data_dir):

        for img_file in os.listdir(os.path.join(data_dir, label_dir)):
            img_path = os.path.join(data_dir, label_dir, img_file)
            image = cv2.imread(img_path, cv2.COLOR_GRAY2BGR)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2LUV)
            X.append(image)
            y.append(class_colors.index(label_dir));

    return np.array(X), np.array(y)


In [83]:
BIN_SIZE = 8
train_path = 'train'
test_path = 'test'

class_colors = ["black", "blue", "green","orange",
                "red", "violet", "white", "yellow"]

X_train, y_train = load_dataset(train_path, class_colors)
X_test, y_test = load_dataset(test_path, class_colors)

### Định nghĩa các hàm rút trích đặc trưng
* Histogram
* Color Moments
* CDC
* CCV

In [84]:
# Histogram
def extract_histogram(image):
    row, column, channel = image.shape[:3]
    size = row * column
    
    feature = []
    for k in range(channel):
        histogram = np.squeeze(cv2.calcHist([image], [k], None, [BIN_SIZE], [0, 256]))
        histogram = histogram / size
        feature.extend(histogram)
    return feature

In [85]:
# Color_momemts
epsilon = 1e-10
def moment(channel):
    feature = []
    mean_value = np.mean(channel)
    std_value = np.std(channel)
    
    if std_value == 0:
        feature.append(mean_value)
        feature.append(std_value)
        feature.append(0)
        feature.append(0)
    else:
        feature.append(mean_value)
        feature.append(std_value)
        feature.append(skew(channel))
        feature.append(kurtosis(channel))
    
    return feature


def extract_color_moment(image):
    row, column, channel = image.shape[:3]
    
    channel_list = []
    for i in range(channel):
        channel_list.append([])
    
    for i in range(row):
        for j in range(column):
            for k in range(channel):
                channel_list[k].append(image[i][j][k])
    
    feature = []
    for i in range(channel):
        feature.extend(moment(channel_list[i]))
    
    return feature

In [86]:
# cdc
def extract_cdc(image):
    # Lấy vùng trung tâm của ảnh
    center = image[image.shape[0]//4:3*image.shape[0]//4, image.shape[1]//4:3*image.shape[1]//4]
    return np.mean(center, axis=(0, 1))


In [87]:
# ccv
def extract_ccv(image, t=50):
    row, col, ch = image.shape
    ccv_feature = []
    for i in range(ch):
        channel = image[:, :, i]
        hist, _ = np.histogram(channel, bins=256, range=(0, 256))
        coherent = np.sum(hist[hist >= t])
        incoherent = np.sum(hist[hist < t])
        ccv_feature.append(coherent)
        ccv_feature.append(incoherent)
    return ccv_feature

## Rút trích đặc trưng

In [88]:
def extract_image(image):
    hist = extract_histogram(image)
    moments = extract_color_moment(image)
    cdc = extract_cdc(image)
    ccv = extract_ccv(image)
    
    features = np.concatenate([hist, moments, cdc, ccv])
    
    return features

In [89]:
def extract_features(X):
    X_features = []
    for image in X:       
        features = extract_image(image)
        X_features.append(features)
    
    return np.array(X_features)

In [90]:
X_train_features = extract_features(X_train)
X_test_features = extract_features(X_test)

## Normalize

In [91]:
scaler = MinMaxScaler()
X_train_features =scaler.fit_transform(X_train_features)
X_test_features = scaler.transform(X_test_features)

## Train model

In [92]:
# Định nghĩa metric
def chi_square(p, q, eps=1e-10):
    return 0.5 * np.sum(((p - q) ** 2) / (p + q + eps))

# def intersection(histA, histB):
#     return np.sum(np.minimum(histA, histB))
def intersection(x, y):
    return 1 - np.sum(np.minimum(x, y))

def bhattacharyya(x, y):
    return -np.log(np.sum(np.sqrt(x * y)) + 1e-10)


In [93]:
metrics = {
    'euclidean': 'euclidean',
    'correlation': 'correlation',
    'chi-square': chi_square,
    'intersection': intersection,
    'bhattacharyya': bhattacharyya
}

In [94]:
for k in [1, 5]:
    print("\n")
    print(f"K = {k}:")
   
    for metric_name, metric in metrics.items():
        
        # Define model
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
        knn.fit(X_train_features, y_train)
        
        y_pred = knn.predict(X_test_features)
        
        accuracy = accuracy_score(y_test, y_pred)
        
        print(f"Metric = {metric_name}, Accuracy = {accuracy*100:.2f}%")



K = 1:
Metric = euclidean, Accuracy = 86.25%
Metric = correlation, Accuracy = 87.50%
Metric = chi-square, Accuracy = 87.50%
Metric = intersection, Accuracy = 83.75%
Metric = bhattacharyya, Accuracy = 60.00%


K = 5:
Metric = euclidean, Accuracy = 86.25%
Metric = correlation, Accuracy = 93.75%
Metric = chi-square, Accuracy = 82.50%
Metric = intersection, Accuracy = 77.50%
Metric = bhattacharyya, Accuracy = 53.75%
