In [1]:
import cv2 as cv
import numpy as np
import time
from skimage.feature import local_binary_pattern
from scipy.stats import mode

In [2]:
image_train=np.load("E:/1-Collage/3-Senior-2/CV/Project/data_set/image_train.npy")
label_train=np.load("E:/1-Collage/3-Senior-2/CV/Project/data_set/label_train.npy")
image_test=np.load("E:/1-Collage/3-Senior-2/CV/Project/data_set/image_test.npy")
label_test=np.load("E:/1-Collage/3-Senior-2/CV/Project/data_set/label_test.npy")

In [3]:
radius = 1
n_points = 8 * radius
method = 'uniform'

# Shuffle the training data and labels
indices = np.arange(len(image_train))
np.random.shuffle(indices)
image_train = image_train[indices]
label_train = label_train[indices]

# Shuffle the test data and labels
indices = np.arange(len(image_test))
np.random.shuffle(indices)
image_test = image_test[indices]
label_test = label_test[indices]


LBP_train = [
    local_binary_pattern(cv.cvtColor(img, cv.COLOR_RGB2GRAY), n_points, radius, method)
    for img in image_train
]
LBP_test = [
    local_binary_pattern(cv.cvtColor(img, cv.COLOR_RGB2GRAY), n_points, radius, method)
    for img in image_test
]

In [5]:
n_bins = n_points + 2
LBP_train_histogram = np.array([
    cv.calcHist([lbp.astype('float32')], [0], None, [n_bins], [0, n_bins]).flatten()
    for lbp in LBP_train
])
LBP_test_histogram = np.array([
    cv.calcHist([lbp.astype('float32')], [0], None, [n_bins], [0, n_bins]).flatten()
    for lbp in LBP_test
])

LBP_all=np.concatenate((LBP_train_histogram,LBP_test_histogram),axis=0)
mean = np.mean(LBP_all, axis=0)
std = np.std(LBP_all, axis=0)

LBP_train_histogram = (LBP_train_histogram - mean) / std
LBP_test_histogram = (LBP_test_histogram - mean) / std


y_train = label_train.flatten()
y_test = label_test.flatten()

In [8]:
def knn_predict(X_train, y_train, X_test, k=3):
    predictions = []
    for x_test in X_test:
        # Compute distances to all training points
        distances = np.linalg.norm(X_train - x_test, axis=1)

        # Find the k nearest neighbors
        k_indices = np.argsort(distances)[:k]
        k_labels = y_train[k_indices]

        # Determine the most common label
        unique_labels, counts = np.unique(k_labels, return_counts=True)
        most_common = unique_labels[np.argmax(counts)]
        
        predictions.append(most_common)

    return np.array(predictions)





In [9]:
y_pred = knn_predict(LBP_train_histogram, y_train, LBP_test_histogram, k=3)  # Use a subset of test data for speed

accuracy = np.mean(y_pred == y_test)  # Match the subset size
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 60.34%


In [None]:
point=12250
print(f"Prediction: {y_pred[point]}")
print(f"Prediction: {y_test[point]}")

In [6]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from skimage.feature import local_binary_pattern

# Train k-NN
knn = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
knn.fit(LBP_train_histogram, y_train)

# Predict and evaluate
y_pred = knn.predict(LBP_test_histogram)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 60.34%


In [7]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(LBP_train_histogram, label_train)


y_test_pred = svm_model.predict(LBP_test_histogram)
print("Test Accuracy:", accuracy_score(label_test, y_test_pred))

Test Accuracy: 0.6006181464075533


In [14]:
def k_means(data, k, max_iters=10000, tol=1e-6):
    
    n_samples, _ = data.shape
    centroids = data[np.random.choice(n_samples, k, replace=False)]
    
    for i in range(max_iters):
        distances = np.linalg.norm(data[:, None, :] - centroids[None, :, :], axis=2)
        labels = np.argmin(distances, axis=1)
        
        new_centroids = np.array([data[labels == i].mean(axis=0) for i in range(k)])
        error=np.linalg.norm(new_centroids - centroids)

        if error < tol:
            print(error)
            break
        
        centroids = new_centroids

    return labels, centroids


def calculate_cluster_labels(labels_train, label_train, k):
    cluster_labels = np.empty(k, dtype=object)  # Use dtype=object for non-numeric labels

    for i in range(k):
        cluster_indices = np.where(labels_train == i)[0]
        true_labels = label_train[cluster_indices].flatten()

        if len(true_labels) > 0: 
            unique_labels, counts = np.unique(true_labels, return_counts=True)
            cluster_labels[i] = unique_labels[np.argmax(counts)]

    # Map each data point's cluster to its corresponding label
    predicted_labels = np.array([cluster_labels[label] for label in labels_train])

    # Calculate accuracy
    accuracy = np.mean(predicted_labels.flatten() == label_train.flatten())
    
    return cluster_labels, predicted_labels, accuracy

def predict(test_data, centroids):
    distances = np.linalg.norm(test_data[:, None, :] - centroids[None, :, :], axis=2)
    return np.argmin(distances, axis=1)


In [15]:
# Run K-Means clustering on the training histograms
k = 141  # Number of clusters
labels_train, centroids = k_means(LBP_train_histogram, k)

0.0


In [13]:
cluster_labels, predicted_labels, accuracy = calculate_cluster_labels(labels_train, label_train, k)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 42.89%
