In [None]:
import numpy as np

def load_emnist_images(filename):
    with open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=16)
        return data.reshape(-1, 784)

def load_emnist_labels(filename):
    with open(filename, 'rb') as f:
        data = np.frombuffer(f.read(), np.uint8, offset=8)
        return data

def householder_qr(X):
    m, n = X.shape
    R = X.copy().astype(np.float64)
    Q = np.eye(m, dtype=np.float64)
    for k in range(n):
        x = R[k:, k].copy()
        if np.linalg.norm(x) == 0:
            continue
        sigma = -np.sign(x[0]) * np.linalg.norm(x)
        v = x.copy()
        v[0] -= sigma
        v = v / np.linalg.norm(v)
        R[k:, k:] -= 2 * np.outer(v, v @ R[k:, k:])
        Q[k:, :] -= 2 * np.outer(v, v @ Q[k:, :])
    return Q[:, :n], R[:n, :]

def givens_rotation(a, b):
    if b == 0:
        return 1, 0
    r = np.hypot(a, b)
    c = a / r
    s = -b / r
    return c, s

def update_qr(Q, R, a):
    m, n = Q.shape
    r = Q.T @ a
    a_ortho = a - Q @ r
    norm_a_ortho = np.linalg.norm(a_ortho)
    if norm_a_ortho < 1e-10:
        R_new = np.hstack((R, r.reshape(-1, 1)))
        Q_new = Q
    else:
        q = a_ortho / norm_a_ortho
        Q_new = np.hstack((Q, q.reshape(-1, 1)))
        R_top = np.hstack((R, r.reshape(-1, 1)))
        R_new = np.vstack((R_top, np.zeros((1, n+1))))
        R_new[-1, -1] = norm_a_ortho
        for i in range(n, m):
            if i+1 >= R_new.shape[0]:
                break
            c, s = givens_rotation(R_new[i, -1], R_new[i+1, -1])
            G = np.array([[c, s], [-s, c]])
            R_new[i:i+2, :] = G @ R_new[i:i+2, :]
            Q_new[:, i:i+2] = Q_new[:, i:i+2] @ G.T
    return Q_new, R_new

def update_qr_multiple_columns(Q, R, X_new):
    Q_current, R_current = Q, R
    for i in range(X_new.shape[1]):
        Q_current, R_current = update_qr(Q_current, R_current, X_new[:, i])
    return Q_current, R_current

def extract_data(images, labels, num_samples_per_class):
    images_out, labels_out = [], []
    class_counts = {i: 0 for i in range(26)}
    for i in range(len(labels)):
        label_idx = labels[i] - 1
        if class_counts[label_idx] < num_samples_per_class:
            images_out.append(images[i])
            labels_out.append(label_idx)
            class_counts[label_idx] += 1
        if all(v >= num_samples_per_class for v in class_counts.values()):
            break
    return np.array(images_out).T, np.array(labels_out)

def predict_labels(Q_list, X_test):
    predictions = []
    for x in X_test.T:
        min_dist = float('inf')
        pred_class = -1
        for cls, Q in enumerate(Q_list):
            proj = Q @ (Q.T @ x)
            dist = np.linalg.norm(x - proj)
            if dist < min_dist:
                min_dist = dist
                pred_class = cls
        predictions.append(pred_class)
    return np.array(predictions)


base_path = 'C:/Users/Asus/Desktop/code/'
train_images = load_emnist_images(base_path + 'emnist-letters-train-images-idx3-ubyte')
train_labels = load_emnist_labels(base_path + 'emnist-letters-train-labels-idx1-ubyte')
test_images = load_emnist_images(base_path + 'emnist-letters-test-images-idx3-ubyte')
test_labels = load_emnist_labels(base_path + 'emnist-letters-test-labels-idx1-ubyte')

train_images = train_images / 255.0
test_images = test_images / 255.0

print(f"Train images shape: {train_images.shape}")
print(f"Train labels shape: {train_labels.shape}")
print(f"Test images shape: {test_images.shape}")
print(f"Test labels shape: {test_labels.shape}")


train_images_extracted, train_labels_extracted = extract_data(train_images, train_labels, 220)

test_images_extracted, test_labels_extracted = extract_data(test_images, test_labels, 20)

# Incremental Update
Q_list_updated = []
for cls in range(26):
    mask = (train_labels_extracted == cls)
    X_initial = train_images_extracted[:, mask][:, :200]
    Q, R = householder_qr(X_initial)
    X_new = train_images_extracted[:, mask][:, 200:220]
    Q_updated, R_updated = update_qr_multiple_columns(Q, R, X_new)
    Q_list_updated.append(Q_updated)

# Evaluate updated model
updated_preds = predict_labels(Q_list_updated, test_images_extracted)
updated_acc = np.mean(updated_preds == test_labels_extracted)
print(f"Model accuracy: {updated_acc*100:.2f}%")

Train images shape: (124800, 784)
Train labels shape: (124800,)
Test images shape: (20800, 784)
Test labels shape: (20800,)
Model accuracy: 57.31%
