In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
#distant connection sparse with local receptive fields (FIXED)
import numpy as np
import cupy as cp
import time
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score
from itertools import product

cp.random.seed(41)
sigmoid = lambda z: 1. / (1. + cp.exp(-z))

def orth_rows(m, n):
    if m <= n:
        q, _ = cp.linalg.qr(cp.random.randn(n, n))
        return q[:m]
    else:
        q, _ = cp.linalg.qr(cp.random.randn(m, n))
        return q

def random_patch_mask(n_hidden, input_dim, patch_size=(10, 10)):
    """Create mask with local receptive fields for first hidden layer"""
    mask = cp.zeros((n_hidden, input_dim))
    img_size = int(np.sqrt(input_dim))  # Convert to standard Python int
    
    for i in range(n_hidden):
        # Get Python integers for coordinates
        h_start = cp.random.randint(0, img_size - patch_size[0] + 1).item()
        w_start = cp.random.randint(0, img_size - patch_size[1] + 1).item()
        
        # Create patch indices
        patch_indices = []
        for h in range(h_start, h_start + patch_size[0]):
            for w in range(w_start, w_start + patch_size[1]):
                idx = h * img_size + w
                patch_indices.append(int(idx))  # Ensure integer index
                
        mask[i, patch_indices] = 1
    return mask

def small_world_mask(n_input, n_hidden, k, r, p):
    mask = cp.zeros((n_hidden, n_input))
    for i in range(n_hidden):
        center = cp.random.randint(0, n_input).item()  # Convert to Python int
        neighbors = [(center + offset) % n_input for offset in range(-r, r+1)]
        selected = neighbors if k > len(neighbors) else cp.random.choice(neighbors, size=k, replace=False)
        mask[i, selected] = 1
        for j, idx in enumerate(selected):
            if cp.random.rand() < p:
                mask[i, idx] = 0
                new_idx = cp.random.randint(0, n_input).item()  # Convert to Python int
                mask[i, new_idx] = 1
    return mask

def train_layer(X_prev, n_hid, C, mask):
    d_prev, N = X_prev.shape
    W_raw = cp.random.randn(n_hid, d_prev)
    W_in = W_raw * mask
    b_in = cp.random.randn(n_hid, 1)

    Z = W_in @ X_prev + b_in
    H = sigmoid(Z)

    HHT = H @ H.T
    I = cp.eye(H.shape[0])
    W_out = cp.linalg.solve(I / C + HHT, H @ X_prev.T)
    return H, (W_in, b_in)

def ml_elm_mnist(k, r, p, p_dist):
    struct = (784, 700, 700, 5000, 10)
    Cs = (1e-1, 1e3, 1e8, 1e8)

    (x_tr, y_tr), (x_te, y_te) = mnist.load_data()
    x_tr = cp.asarray(x_tr.reshape(-1, 28*28).T / 255.)
    x_te = cp.asarray(x_te.reshape(-1, 28*28).T / 255.)
    Y_train = cp.asarray((np.eye(10)[y_tr].T) * 2 - 1)

    # H0 = input layer
    H0 = x_tr

    # Hidden Layer 1 with random patch sampling
    mask1 = random_patch_mask(struct[1], struct[0])  # 10x10 patches
    H1, params1 = train_layer(H0, struct[1], Cs[0], mask1)

    # Hidden Layer 2
    mask2 = small_world_mask(struct[1], struct[2], k, r, p)
    H2, params2 = train_layer(H1, struct[2], Cs[1], mask2)

    # Hidden Layer 3 with sparse H1-to-H3 distant connections
    H_concat = cp.concatenate((H2, H1), axis=0)
    mask3 = small_world_mask(H_concat.shape[0], struct[3], k, r, p)

    # Apply sparsity to H1 part of H_concat using p_dist
    h1_start = H2.shape[0]
    mask3[:, h1_start:] *= (cp.random.rand(struct[3], H1.shape[0]) > p_dist)

    H3, params3 = train_layer(H_concat, struct[3], Cs[2], mask3)

    # Output layer
    HHT = H3 @ H3.T
    I = cp.eye(H3.shape[0])
    W_out = cp.linalg.solve(I / Cs[-1] + HHT, H3 @ Y_train.T)

    def forward(X):
        H0 = X
        Z1 = params1[0] @ H0 + params1[1]
        H1 = sigmoid(Z1)

        Z2 = params2[0] @ H1 + params2[1]
        H2 = sigmoid(Z2)

        H_concat = cp.concatenate((H2, H1), axis=0)
        Z3 = params3[0] @ H_concat + params3[1]
        H3 = sigmoid(Z3)

        return W_out.T @ H3

    tr_pred = cp.asnumpy(forward(x_tr)).argmax(0)
    te_pred = cp.asnumpy(forward(x_te)).argmax(0)
    tr_acc = accuracy_score(y_tr, tr_pred)
    te_acc = accuracy_score(y_te, te_pred)
    return tr_acc, te_acc

# Hyperparameters
k_list = [4]
r_list = [4]
p_list = [0.02]
p_dist_list = [0.05]  # probability of dropping H1→H3 connections

param_grid = list(product(k_list, r_list, p_list, p_dist_list))

print(f"Total Runs: {len(param_grid)}")
best_acc = 0
best_params = None
start_total = time.time()

for i, (k, r, p, p_dist) in enumerate(param_grid):
    print(f"Trial {i+1}/{len(param_grid)}: k={k}, r={r}, p={p:.2f}, p_dist={p_dist:.2f}")
    tr_acc, te_acc = ml_elm_mnist(k, r, p, p_dist)
    print(f"Train Acc: {tr_acc*100:.2f}%, Test Acc: {te_acc*100:.2f}%")
    if te_acc > best_acc:
        best_acc = te_acc
        best_params = (k, r, p, p_dist)
    print(f"Best So Far: {best_acc*100:.2f}% with k={best_params[0]}, r={best_params[1]}, p={best_params[2]}, p_dist={best_params[3]}")

print("\n==== Grid Search Completed ====")
print(f"Best Parameters: k={best_params[0]}, r={best_params[1]}, p={best_params[2]}, p_dist={best_params[3]}")
print(f"Best Test Accuracy: {best_acc*100:.2f}%")
print(f"Total Time: {(time.time()-start_total)/60:.2f} min")

Total Runs: 1
Trial 1/1: k=4, r=4, p=0.02, p_dist=0.05
Train Acc: 98.43%, Test Acc: 97.53%
Best So Far: 97.53% with k=4, r=4, p=0.02, p_dist=0.05

==== Grid Search Completed ====
Best Parameters: k=4, r=4, p=0.02, p_dist=0.05
Best Test Accuracy: 97.53%
Total Time: 0.85 min


In [1]:
# === Full MELM + SWOA (Single Cell) ===
import numpy as np
import cupy as cp
import time
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score

cp.random.seed(41)
sigmoid = lambda z: 1. / (1. + cp.exp(-z))

def orth_rows(m, n):
    if m <= n:
        q, _ = cp.linalg.qr(cp.random.randn(n, n))
        return q[:m]
    else:
        q, _ = cp.linalg.qr(cp.random.randn(m, n))
        return q

def random_patch_mask(n_hidden, input_dim, patch_size=(10, 10)):
    mask = cp.zeros((n_hidden, input_dim))
    img_size = int(np.sqrt(input_dim))
    for i in range(n_hidden):
        h_start = cp.random.randint(0, img_size - patch_size[0] + 1).item()
        w_start = cp.random.randint(0, img_size - patch_size[1] + 1).item()
        patch_indices = []
        for h in range(h_start, h_start + patch_size[0]):
            for w in range(w_start, w_start + patch_size[1]):
                idx = h * img_size + w
                patch_indices.append(int(idx))
        mask[i, patch_indices] = 1
    return mask

def small_world_mask(n_input, n_hidden, k, r, p):
    mask = cp.zeros((n_hidden, n_input))
    for i in range(n_hidden):
        center = cp.random.randint(0, n_input).item()
        neighbors = [(center + offset) % n_input for offset in range(-r, r+1)]
        selected = neighbors if k > len(neighbors) else cp.random.choice(neighbors, size=k, replace=False)
        mask[i, selected] = 1
        for j, idx in enumerate(selected):
            if cp.random.rand() < p:
                mask[i, idx] = 0
                new_idx = cp.random.randint(0, n_input).item()
                mask[i, new_idx] = 1
    return mask

def train_layer(X_prev, n_hid, C, mask):
    d_prev, N = X_prev.shape
    W_raw = cp.random.randn(n_hid, d_prev)
    W_in = W_raw * mask
    b_in = cp.random.randn(n_hid, 1)
    Z = W_in @ X_prev + b_in
    H = sigmoid(Z)
    HHT = H @ H.T
    I = cp.eye(H.shape[0])
    W_out = cp.linalg.solve(I / C + HHT, H @ X_prev.T)
    return H, (W_in, b_in)

def ml_elm_mnist(k, r, p, p_dist):
    struct = (784, 700, 700, 5000, 10)
    Cs = (1e-1, 1e3, 1e8, 1e8)
    (x_tr, y_tr), (x_te, y_te) = mnist.load_data()
    x_tr = cp.asarray(x_tr.reshape(-1, 28*28).T / 255.)
    x_te = cp.asarray(x_te.reshape(-1, 28*28).T / 255.)
    Y_train = cp.asarray((np.eye(10)[y_tr].T) * 2 - 1)
    H0 = x_tr
    mask1 = random_patch_mask(struct[1], struct[0])
    H1, params1 = train_layer(H0, struct[1], Cs[0], mask1)
    mask2 = small_world_mask(struct[1], struct[2], k, r, p)
    H2, params2 = train_layer(H1, struct[2], Cs[1], mask2)
    H_concat = cp.concatenate((H2, H1), axis=0)
    mask3 = small_world_mask(H_concat.shape[0], struct[3], k, r, p)
    h1_start = H2.shape[0]
    mask3[:, h1_start:] *= (cp.random.rand(struct[3], H1.shape[0]) > p_dist)
    H3, params3 = train_layer(H_concat, struct[3], Cs[2], mask3)
    HHT = H3 @ H3.T
    I = cp.eye(H3.shape[0])
    W_out = cp.linalg.solve(I / Cs[-1] + HHT, H3 @ Y_train.T)

    def forward(X):
        H0 = X
        Z1 = params1[0] @ H0 + params1[1]
        H1 = sigmoid(Z1)
        Z2 = params2[0] @ H1 + params2[1]
        H2 = sigmoid(Z2)
        H_concat = cp.concatenate((H2, H1), axis=0)
        Z3 = params3[0] @ H_concat + params3[1]
        H3 = sigmoid(Z3)
        return W_out.T @ H3

    tr_pred = cp.asnumpy(forward(x_tr)).argmax(0)
    te_pred = cp.asnumpy(forward(x_te)).argmax(0)
    tr_acc = accuracy_score(y_tr, tr_pred)
    te_acc = accuracy_score(y_te, te_pred)
    return tr_acc, te_acc

# === SWOA ===
POP_SIZE = 6
MAX_ITERS = 10
mutation_prob = 0.7
k_bounds = (2, 8)
r_bounds = (1, 6)
p_bounds = (0.01, 0.1)
p_dist_bounds = (0.01, 0.1)

def random_candidate():
    return [
        np.random.randint(*k_bounds),
        np.random.randint(*r_bounds),
        np.random.uniform(*p_bounds),
        np.random.uniform(*p_dist_bounds)
    ]

def clip_params(s):
    s[0] = int(np.clip(s[0], *k_bounds))
    s[1] = int(np.clip(s[1], *r_bounds))
    s[2] = float(np.clip(s[2], *p_bounds))
    s[3] = float(np.clip(s[3], *p_dist_bounds))
    return s

def fitness(s):
    try:
        return ml_elm_mnist(*s)[1]
    except Exception as e:
        print(f"Error for {s}: {e}")
        return 0.0

def local_shortcut(s):
    s_new = s.copy()
    idx = np.random.randint(4)
    if idx in [0, 1]:
        s_new[idx] += np.random.choice([-1, 1])
    else:
        s_new[idx] += np.random.uniform(-0.01, 0.01)
    return clip_params(s_new)

def random_long_jump():
    return random_candidate()

population = [random_candidate() for _ in range(POP_SIZE)]
fitness_vals = [fitness(s) for s in population]
best_idx = np.argmax(fitness_vals)
best_sol = population[best_idx]
best_fit = fitness_vals[best_idx]

start_time = time.time()

for it in range(MAX_ITERS):
    new_population = []
    for i in range(POP_SIZE):
        s_base = population[i]
        s_prime = random_long_jump() if np.random.rand() > mutation_prob else s_base
        s_new = local_shortcut(s_prime)
        f_old = fitness_vals[i]
        f_new = fitness(s_new)
        if f_new > f_old:
            new_population.append(s_new)
            fitness_vals[i] = f_new
        else:
            new_population.append(s_base)
        if f_new > best_fit:
            best_fit = f_new
            best_sol = s_new
            print(f"[Iter {it+1}] New Best: {best_fit*100:.2f}% with k={best_sol[0]}, r={best_sol[1]}, p={best_sol[2]:.3f}, p_dist={best_sol[3]:.3f}")
    population = new_population
    if best_fit > 0.98:
        break

end_time = time.time()
print("\n==== SWOA Completed ====")
print(f"Best Params: k={best_sol[0]}, r={best_sol[1]}, p={best_sol[2]:.3f}, p_dist={best_sol[3]:.3f}")
print(f"Best Test Accuracy: {best_fit*100:.2f}%")
print(f"Total Time: {(end_time - start_time)/60:.2f} min")


2025-07-04 14:24:00.045134: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751639040.232660      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751639040.288981      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[Iter 5] New Best: 97.59% with k=6, r=2, p=0.098, p_dist=0.066
[Iter 8] New Best: 97.60% with k=2, r=4, p=0.081, p_dist=0.024

==== SWOA Completed ====
Best Params: k=2, r=4, p=0.081, p_dist=0.024
Best Test Accuracy: 97.60%
Total Time: 49.91 min


In [None]:
import numpy as np
import cupy as cp
import time
from tensorflow.keras.datasets import mnist
from sklearn.metrics import accuracy_score

cp.random.seed(41)
sigmoid = lambda z: 1. / (1. + cp.exp(-z))

def orth_rows(m, n):
    if m <= n:
        q, _ = cp.linalg.qr(cp.random.randn(n, n))
        return q[:m]
    else:
        q, _ = cp.linalg.qr(cp.random.randn(m, n))
        return q

def random_patch_mask(n_hidden, input_dim, patch_size=(10, 10)):
    mask = cp.zeros((n_hidden, input_dim))
    img_size = int(np.sqrt(input_dim))
    for i in range(n_hidden):
        h_start = cp.random.randint(0, img_size - patch_size[0] + 1).item()
        w_start = cp.random.randint(0, img_size - patch_size[1] + 1).item()
        patch_indices = []
        for h in range(h_start, h_start + patch_size[0]):
            for w in range(w_start, w_start + patch_size[1]):
                idx = h * img_size + w
                patch_indices.append(int(idx))
        mask[i, patch_indices] = 1
    return mask

def small_world_mask(n_input, n_hidden, k, r, p):
    mask = cp.zeros((n_hidden, n_input))
    for i in range(n_hidden):
        center = cp.random.randint(0, n_input).item()
        neighbors = [(center + offset) % n_input for offset in range(-r, r+1)]
        selected = neighbors if k > len(neighbors) else cp.random.choice(neighbors, size=k, replace=False)
        mask[i, selected] = 1
        for idx in selected:
            if cp.random.rand() < p:
                mask[i, idx] = 0
                new_idx = cp.random.randint(0, n_input).item()
                mask[i, new_idx] = 1
    return mask

def train_layer(X_prev, n_hid, C, mask, custom_weights=None, custom_bias=None):
    d_prev, N = X_prev.shape
    W_in = custom_weights if custom_weights is not None else cp.random.randn(n_hid, d_prev) * mask
    b_in = custom_bias if custom_bias is not None else cp.random.randn(n_hid, 1)
    Z = W_in @ X_prev + b_in
    H = sigmoid(Z)
    HHT = H @ H.T
    I = cp.eye(H.shape[0])
    W_out = cp.linalg.solve(I / C + HHT, H @ X_prev.T)
    return H, (W_in, b_in)

def ml_elm_mnist_weights(k, r, p, p_dist, layer_weights=None):
    struct = (784, 700, 700, 5000, 10)
    Cs = (1e-1, 1e3, 1e8, 1e8)
    (x_tr, y_tr), (x_te, y_te) = mnist.load_data()
    x_tr = cp.asarray(x_tr.reshape(-1, 28*28).T / 255.)
    x_te = cp.asarray(x_te.reshape(-1, 28*28).T / 255.)
    Y_train = cp.asarray((np.eye(10)[y_tr].T) * 2 - 1)
    H0 = x_tr

    mask1 = random_patch_mask(struct[1], struct[0])
    W1, B1 = None, None if layer_weights is None else layer_weights[0]
    H1, (W1, B1) = train_layer(H0, struct[1], Cs[0], mask1, W1, B1)

    mask2 = small_world_mask(struct[1], struct[2], k, r, p)
    W2, B2 = None, None if layer_weights is None else layer_weights[1]
    H2, (W2, B2) = train_layer(H1, struct[2], Cs[1], mask2, W2, B2)

    H_concat = cp.concatenate((H2, H1), axis=0)
    mask3 = small_world_mask(H_concat.shape[0], struct[3], k, r, p)
    h1_start = H2.shape[0]
    mask3[:, h1_start:] *= (cp.random.rand(struct[3], H1.shape[0]) > p_dist)
    W3, B3 = None, None if layer_weights is None else layer_weights[2]
    H3, (W3, B3) = train_layer(H_concat, struct[3], Cs[2], mask3, W3, B3)

    HHT = H3 @ H3.T
    I = cp.eye(H3.shape[0])
    W_out = cp.linalg.solve(I / Cs[-1] + HHT, H3 @ Y_train.T)

    def forward(X):
        Z1 = W1 @ X + B1
        H1 = sigmoid(Z1)
        Z2 = W2 @ H1 + B2
        H2 = sigmoid(Z2)
        H_concat = cp.concatenate((H2, H1), axis=0)
        Z3 = W3 @ H_concat + B3
        H3 = sigmoid(Z3)
        return W_out.T @ H3

    tr_pred = cp.asnumpy(forward(x_tr)).argmax(0)
    te_pred = cp.asnumpy(forward(x_te)).argmax(0)
    tr_acc = accuracy_score(y_tr, tr_pred)
    te_acc = accuracy_score(y_te, te_pred)
    return tr_acc, te_acc, [(W1, B1), (W2, B2), (W3, B3)]

# === Phase 1: SWOA Optimization for (k, r, p, p_dist) ===
POP_SIZE = 4
MAX_ITERS_PHASE1 = 10
mutation_prob = 0.7
k_bounds = (2, 8)
r_bounds = (1, 6)
p_bounds = (0.01, 0.1)
p_dist_bounds = (0.01, 0.1)

def random_candidate():
    return [
        np.random.randint(*k_bounds),
        np.random.randint(*r_bounds),
        np.random.uniform(*p_bounds),
        np.random.uniform(*p_dist_bounds)
    ]

def clip_params(s):
    s[0] = int(np.clip(s[0], *k_bounds))
    s[1] = int(np.clip(s[1], *r_bounds))
    s[2] = float(np.clip(s[2], *p_bounds))
    s[3] = float(np.clip(s[3], *p_dist_bounds))
    return s

def fitness_phase1(s):
    try:
        _, acc, _ = ml_elm_mnist_weights(*s)
        return acc
    except:
        return 0.0

def local_shortcut(s):
    s_new = s.copy()
    idx = np.random.randint(4)
    s_new[idx] += np.random.choice([-1, 1]) if idx < 2 else np.random.uniform(-0.01, 0.01)
    return clip_params(s_new)

def random_long_jump():
    return random_candidate()

population = [random_candidate() for _ in range(POP_SIZE)]
fitness_vals = [fitness_phase1(s) for s in population]
best_idx = np.argmax(fitness_vals)
best_sol = population[best_idx]
best_fit = fitness_vals[best_idx]

for it in range(MAX_ITERS_PHASE1):
    print(f"[Phase 1] Iteration {it+1}/{MAX_ITERS_PHASE1}")
    new_population = []
    for i in range(POP_SIZE):
        s_base = population[i]
        s_prime = random_long_jump() if np.random.rand() > mutation_prob else s_base
        s_new = local_shortcut(s_prime)
        f_old = fitness_vals[i]
        f_new = fitness_phase1(s_new)
        if f_new > f_old:
            new_population.append(s_new)
            fitness_vals[i] = f_new
        else:
            new_population.append(s_base)
        if f_new > best_fit:
            best_fit = f_new
            best_sol = s_new
            print(f"[Phase 1][Iter {it+1}] New Best Acc: {best_fit*100:.2f}% -> k={s_new[0]}, r={s_new[1]}, p={s_new[2]:.3f}, p_dist={s_new[3]:.3f}")
    population = new_population

# === Phase 2: Optimize Weights ===
print("\n[Phase 2] Optimizing Weights using SWOA")

weights_base = ml_elm_mnist_weights(*best_sol)[2]
weight_population = [weights_base for _ in range(POP_SIZE)]

def mutate_weights(weights):
    new_weights = []
    for W, B in weights:
        W_mut = W + cp.random.normal(0, 0.01, W.shape)
        B_mut = B + cp.random.normal(0, 0.01, B.shape)
        new_weights.append((W_mut, B_mut))
    return new_weights

def fitness_phase2(weights):
    try:
        _, acc, _ = ml_elm_mnist_weights(*best_sol, layer_weights=weights)
        return acc
    except:
        return 0.0

fitness_vals = [fitness_phase2(w) for w in weight_population]
best_idx = np.argmax(fitness_vals)
best_weights = weight_population[best_idx]
best_fit = fitness_vals[best_idx]

for it in range(MAX_ITERS_PHASE1):
    print(f"[Phase 2] Iteration {it+1}/{MAX_ITERS_PHASE1}")
    new_population = []
    for i in range(POP_SIZE):
        base_w = weight_population[i]
        new_w = mutate_weights(base_w)
        f_old = fitness_vals[i]
        f_new = fitness_phase2(new_w)
        if f_new > f_old:
            new_population.append(new_w)
            fitness_vals[i] = f_new
        else:
            new_population.append(base_w)
        if f_new > best_fit:
            best_fit = f_new
            best_weights = new_w
            print(f"[Phase 2][Iter {it+1}] Improved Test Accuracy: {best_fit*100:.2f}%")
    weight_population = new_population

print("\n==== Final Model Evaluation ====")
print(f"Best (k, r, p, p_dist): {best_sol}")
print(f"Best Final Test Accuracy: {best_fit*100:.2f}%")


2025-07-04 15:28:31.045544: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751642911.205659      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751642911.251818      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[Phase 1] Iteration 1/10
[Phase 1][Iter 1] New Best Acc: 97.60% -> k=2, r=2, p=0.036, p_dist=0.028
[Phase 1] Iteration 2/10
[Phase 1] Iteration 3/10
[Phase 1] Iteration 4/10
[Phase 1] Iteration 5/10
[Phase 1][Iter 5] New Best Acc: 97.61% -> k=2, r=2, p=0.036, p_dist=0.030
[Phase 1] Iteration 6/10
[Phase 1] Iteration 7/10
[Phase 1] Iteration 8/10
[Phase 1] Iteration 9/10
