#Loading the dataset

In [None]:
from google.colab import files

uploaded = files.upload()


Saving emnist-letters-test-images-idx3-ubyte to emnist-letters-test-images-idx3-ubyte
Saving emnist-letters-test-labels-idx1-ubyte to emnist-letters-test-labels-idx1-ubyte
Saving emnist-letters-train-images-idx3-ubyte to emnist-letters-train-images-idx3-ubyte
Saving emnist-letters-train-labels-idx1-ubyte to emnist-letters-train-labels-idx1-ubyte


#Reading the data

In [None]:
import numpy as np
import os

def load_emnist_images(filename):
    with open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
        return data.reshape(-1, 784)

def load_emnist_labels(filename):
    with open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
        return data

base_path = '/content/'
train_images = load_emnist_images(base_path + 'emnist-letters-train-images-idx3-ubyte')
train_labels = load_emnist_labels(base_path + 'emnist-letters-train-labels-idx1-ubyte')
test_images = load_emnist_images(base_path + 'emnist-letters-test-images-idx3-ubyte')
test_labels = load_emnist_labels(base_path + 'emnist-letters-test-labels-idx1-ubyte')

#Preparing training and test data for each class

In [None]:
train_images = train_images / 255.0
test_images = test_images / 255.0

print(f"Train images shape: {train_images.shape}")
print(f"Train labels shape: {train_labels.shape}")
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")

num_classes = 26
train_data = {}
test_data = {}
for cls in range(1, num_classes + 1):
    train_idx = np.where(train_labels == cls)[0][:200]
    test_idx = np.where(test_labels == cls)[0][:20]
    train_data[cls - 1] = train_images[train_idx].T
    test_data[cls - 1] = test_images[test_idx].T
test_labels_filtered = np.array([cls - 1 for cls in range(1, num_classes + 1) for _ in range(20)])

Train images shape: (124800, 784)
Train labels shape: (124800,)
Test images shape: (20800, 784)
Test labels shape: (20800,)


# Calculating QR decomposition for each class using the Householder matrix

In [None]:
def householder_qr(A):
    m, n = A.shape
    Q = np.eye(m)
    R = A.copy()
    for j in range(n):
        x = R[j:, j]
        v = x.copy()
        v[0] += np.sign(x[0]) * np.linalg.norm(x)
        v = v / np.linalg.norm(v)
        H = np.eye(m)
        H[j:, j:] -= 2 * np.outer(v, v)
        R = H @ R
        Q = Q @ H.T
    return Q, R

qr_cache = {}
for class_idx, A in train_data.items():
    Q, R = householder_qr(A)
    qr_cache[class_idx] = (Q, R)

# Solving the least squares problem to predict the labels of test data

In [None]:
def predict_label(test_sample, train_matrices, qr_cache):
    errors = []
    for class_idx, A in train_matrices.items():
        Q, R = qr_cache[class_idx]
        b = Q.T @ test_sample
        c, _, _, _ = np.linalg.lstsq(R[:200, :200], b[:200], rcond=None)
        error = np.linalg.norm(A @ c - test_sample)
        errors.append((error, class_idx))
    return min(errors, key=lambda x: x[0])[1]

# Evaluating the model's performance

In [None]:
correct = 0
for i in range(num_classes):
    for j in range(20):
        test_sample = test_data[i][:, j]
        pred_label = predict_label(test_sample, train_data, qr_cache)
        if pred_label == i:
            correct += 1

accuracy = correct / (20 * num_classes)
print(f"Model accuracy: {accuracy * 100:.2f}%")

Model accuracy: 68.65%
