In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
# ml_elm_mnist_cupy.py


import gc, time, cupy as cp
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

cp.random.seed(41)
sigmoid = lambda z: 1. / (1. + cp.exp(-z))

def orth_rows(m: int, n: int) -> cp.ndarray:
    if m <= n:
        q, _ = cp.linalg.qr(cp.random.randn(n, n))
        return q[:m]
    else:
        q, _ = cp.linalg.qr(cp.random.randn(m, n))
        return q

def train_layer(X_prev, n_hid, C, rho=0.05):
    d_prev, N = X_prev.shape
    W_in = orth_rows(n_hid, d_prev).astype(cp.float64)
    b_in = cp.random.randn(n_hid, 1).astype(cp.float64)

    Z = W_in @ X_prev + b_in
    H = Z if n_hid == d_prev else sigmoid(Z)

    # Ridge-regularised least squares (float64 for accuracy)
    HHT = H @ H.T
    I = cp.eye(H.shape[0], dtype=cp.float64)
    W_out = cp.linalg.solve(I / C + HHT, H @ X_prev.T)

    del HHT; gc.collect()
    return H, (W_in, b_in, n_hid == d_prev)

def ml_elm_mnist(struct=(784, 700, 700, 5000, 10),
                 Cs=(1e-1, 1e3, 1e8, 1e8),
                 rho=0.05):

    # Load and normalize data, move to GPU
    (x_tr, y_tr), (x_te, y_te) = mnist.load_data()
    x_tr = cp.asarray(x_tr.reshape(-1, 28*28).T, dtype=cp.float64) / 255.
    x_te = cp.asarray(x_te.reshape(-1, 28*28).T, dtype=cp.float64) / 255.

    Y_train = cp.asarray((cp.eye(10)[y_tr] * 2 - 1).T, dtype=cp.float64)
    Y_test  = cp.asarray((cp.eye(10)[y_te] * 2 - 1).T, dtype=cp.float64)

    layers = []
    H_prev = x_tr
    start = time.time()

    for idx, (d_in, d_out, C) in enumerate(zip(struct[:-1], struct[1:], Cs[:-1])):
        H_prev, params = train_layer(H_prev, d_out, C, rho)
        layers.append(params)
        print(f"Layer {idx+1}: {d_in}->{d_out} (C={C:g}) done.")

    # Output layer (ridge regression)
    HHT = H_prev @ H_prev.T
    I = cp.eye(H_prev.shape[0], dtype=cp.float64)
    W_out = cp.linalg.solve(I / Cs[-1] + HHT, H_prev @ Y_train.T)

    train_time = time.time() - start
    print(f"Training finished in {train_time:.1f} seconds")

    def forward(X):
        H = X
        for W_in, b_in, is_linear in layers:
            Z = W_in @ H + b_in
            H = Z if is_linear else sigmoid(Z)
        return W_out.T @ H

    tr_pred = cp.asnumpy(forward(x_tr)).argmax(0)
    te_pred = cp.asnumpy(forward(x_te)).argmax(0)
    tr_acc = accuracy_score(cp.asnumpy(y_tr), tr_pred)
    te_acc = accuracy_score(cp.asnumpy(y_te), te_pred)

    return train_time, tr_acc, te_acc

if __name__ == "__main__":
    t, acc_tr, acc_te = ml_elm_mnist()
    print(f"\nTraining Accuracy: {acc_tr * 100:.2f} %")
    print(f"Testing  Accuracy: {acc_te * 100:.2f} %")


2025-06-26 15:55:35.784065: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750953335.808219     210 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750953335.815675     210 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Layer 1: 784->700 (C=0.1) done.
Layer 2: 700->700 (C=1000) done.
Layer 3: 700->5000 (C=1e+08) done.
Training finished in 18.9 seconds

Training Accuracy: 98.17 %
Testing  Accuracy: 96.99 %


In [5]:
#base melm on fashion mnist
# ml_elm_fashion_mnist.py

import gc, time, numpy as np
from scipy.linalg import qr
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import fashion_mnist

np.random.seed(41)
sigmoid = lambda z: 1. / (1. + np.exp(-z))

def orth_rows(m: int, n: int) -> np.ndarray:
    if m <= n:
        q, _ = np.linalg.qr(np.random.randn(n, n))
        return q[:m]
    else:
        q, _ = np.linalg.qr(np.random.randn(m, n))
        return q

def train_layer(X_prev, n_hid, C, rho=0.05):
    d_prev, N = X_prev.shape
    W_in = orth_rows(n_hid, d_prev).astype(np.float64)
    b_in = np.random.randn(n_hid, 1).astype(np.float64)

    Z = W_in @ X_prev + b_in
    H = Z if n_hid == d_prev else sigmoid(Z)

    HHT = H @ H.T
    I = np.eye(H.shape[0], dtype=np.float64)
    W_out = np.linalg.solve(I / C + HHT, H @ X_prev.T)

    del HHT; gc.collect()
    return H, (W_in, b_in, n_hid == d_prev)

def ml_elm_fashion_mnist(struct=(784, 700, 700, 5000, 10),
                         Cs=(1e-1, 1e3, 1e8, 1e8),
                         rho=0.05):

    (x_tr, y_tr), (x_te, y_te) = fashion_mnist.load_data()
    x_tr = x_tr.reshape(-1, 28*28).T.astype(np.float64) / 255.
    x_te = x_te.reshape(-1, 28*28).T.astype(np.float64) / 255.

    Y_train = np.eye(10)[y_tr].T.astype(np.float64) * 2 - 1
    Y_test  = np.eye(10)[y_te].T.astype(np.float64) * 2 - 1

    layers = []
    H_prev = x_tr
    start = time.time()

    for idx, (d_in, d_out, C) in enumerate(zip(struct[:-1], struct[1:], Cs[:-1])):
        H_prev, params = train_layer(H_prev, d_out, C, rho)
        layers.append(params)
        print(f"Layer {idx+1}: {d_in}->{d_out} (C={C:g}) done.")

    HHT = H_prev @ H_prev.T
    I = np.eye(H_prev.shape[0], dtype=np.float64)
    W_out = np.linalg.solve(I / Cs[-1] + HHT, H_prev @ Y_train.T)

    train_time = time.time() - start
    print(f"Training finished in {train_time:.1f} seconds")

    def forward(X):
        H = X
        for W_in, b_in, is_linear in layers:
            Z = W_in @ H + b_in
            H = Z if is_linear else sigmoid(Z)
        return W_out.T @ H

    tr_pred = forward(x_tr).argmax(0)
    te_pred = forward(x_te).argmax(0)
    tr_acc = accuracy_score(y_tr, tr_pred)
    te_acc = accuracy_score(y_te, te_pred)

    return train_time, tr_acc, te_acc

if __name__ == "__main__":
    t, acc_tr, acc_te = ml_elm_fashion_mnist()
    print(f"\nTraining Accuracy: {acc_tr * 100:.2f} %")
    print(f"Testing  Accuracy: {acc_te * 100:.2f} %")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Layer 1: 784->700 (C=0.1) done.
Layer 2: 700->700 (C=1000) done.
Layer 3: 700->5000 (C=1e+08) done.
Training finished in 43.6 seconds

Training Accuracy: 91.70 %
Testing  Accuracy: 87.52 %


In [6]:
#base melm on k-mnist
# ml_elm_kmnist.py

import gc, time, numpy as np
from scipy.linalg import qr
from sklearn.metrics import accuracy_score
import tensorflow_datasets as tfds

np.random.seed(41)
sigmoid = lambda z: 1. / (1. + np.exp(-z))

def orth_rows(m: int, n: int) -> np.ndarray:
    if m <= n:
        q, _ = np.linalg.qr(np.random.randn(n, n))
        return q[:m]
    else:
        q, _ = np.linalg.qr(np.random.randn(m, n))
        return q

def train_layer(X_prev, n_hid, C, rho=0.05):
    d_prev, N = X_prev.shape
    W_in = orth_rows(n_hid, d_prev).astype(np.float64)
    b_in = np.random.randn(n_hid, 1).astype(np.float64)

    Z = W_in @ X_prev + b_in
    H = Z if n_hid == d_prev else sigmoid(Z)

    HHT = H @ H.T
    I = np.eye(H.shape[0], dtype=np.float64)
    W_out = np.linalg.solve(I / C + HHT, H @ X_prev.T)

    del HHT; gc.collect()
    return H, (W_in, b_in, n_hid == d_prev)

def load_kmnist():
    ds_train, ds_test = tfds.load('kmnist', split=['train', 'test'], batch_size=-1, as_supervised=True)
    x_tr, y_tr = tfds.as_numpy(ds_train)
    x_te, y_te = tfds.as_numpy(ds_test)
    return (x_tr, y_tr), (x_te, y_te)

def ml_elm_kmnist(struct=(784, 700, 700, 5000, 10),
                  Cs=(1e-1, 1e3, 1e8, 1e8),
                  rho=0.05):

    (x_tr, y_tr), (x_te, y_te) = load_kmnist()
    x_tr = x_tr.reshape(-1, 28*28).T.astype(np.float64) / 255.
    x_te = x_te.reshape(-1, 28*28).T.astype(np.float64) / 255.

    Y_train = np.eye(10)[y_tr].T.astype(np.float64) * 2 - 1
    Y_test  = np.eye(10)[y_te].T.astype(np.float64) * 2 - 1

    layers = []
    H_prev = x_tr
    start = time.time()

    for idx, (d_in, d_out, C) in enumerate(zip(struct[:-1], struct[1:], Cs[:-1])):
        H_prev, params = train_layer(H_prev, d_out, C, rho)
        layers.append(params)
        print(f"Layer {idx+1}: {d_in}->{d_out} (C={C:g}) done.")

    HHT = H_prev @ H_prev.T
    I = np.eye(H_prev.shape[0], dtype=np.float64)
    W_out = np.linalg.solve(I / Cs[-1] + HHT, H_prev @ Y_train.T)

    train_time = time.time() - start
    print(f"Training finished in {train_time:.1f} seconds")

    def forward(X):
        H = X
        for W_in, b_in, is_linear in layers:
            Z = W_in @ H + b_in
            H = Z if is_linear else sigmoid(Z)
        return W_out.T @ H

    tr_pred = forward(x_tr).argmax(0)
    te_pred = forward(x_te).argmax(0)
    tr_acc = accuracy_score(y_tr, tr_pred)
    te_acc = accuracy_score(y_te, te_pred)

    return train_time, tr_acc, te_acc

if __name__ == "__main__":
    t, acc_tr, acc_te = ml_elm_kmnist()
    print(f"\nTraining Accuracy: {acc_tr * 100:.2f} %")
    print(f"Testing  Accuracy: {acc_te * 100:.2f} %")


Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/kmnist/3.0.1...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

I0000 00:00:1750953440.651781     210 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1583 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1750953440.652478     210 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


Shuffling /root/tensorflow_datasets/kmnist/incomplete.928PMY_3.0.1/kmnist-train.tfrecord*...:   0%|          |…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/kmnist/incomplete.928PMY_3.0.1/kmnist-test.tfrecord*...:   0%|          | …

Dataset kmnist downloaded and prepared to /root/tensorflow_datasets/kmnist/3.0.1. Subsequent calls will reuse this data.
Layer 1: 784->700 (C=0.1) done.
Layer 2: 700->700 (C=1000) done.
Layer 3: 700->5000 (C=1e+08) done.
Training finished in 41.8 seconds

Training Accuracy: 96.17 %
Testing  Accuracy: 84.92 %
