In [1]:
import pandas as pd
rna_data = pd.read_csv("gene_data/rna_common_complete.csv")
rna_data = rna_data.sort_values(by=['sn','period']).reset_index(drop=True)

In [2]:
X_og_shape = rna_data.drop(['sn','group','caarms_status','period'],axis=1).values
X_reshaped = X_og_shape.reshape(len(set(rna_data['sn'])), 3, X_og_shape.shape[1])
labels_group = rna_data[rna_data['period'] == 24]['group'].values
labels = [0 if i == 'C' else 1 for i in labels_group]

In [3]:
import torch
import numpy as np
from neucube.utils import SNR
from neucube.utils import interpolate
from neucube.encoder import Delta

ratios = SNR(X_reshaped[:,0,:], labels)
top_idx = torch.argsort(ratios, descending=True)[0:20]
X_reshaped_topidx = X_reshaped[:,:,top_idx]
interpolated_X = interpolate(X_reshaped_topidx, num_points=104)

encoder = Delta(threshold=0.008)
X = encoder.encode_dataset(interpolated_X)
y = torch.tensor(labels)

In [4]:
import nevergrad as ng
import numpy as np
from functools import partial
from neucube.sampler import TemporalBinning
from neucube.utils import SeparationIndex
from tqdm import tqdm

neuron_parm_dict = { 
    'rs' : {'a': 0.02, 'b': 0.2, 'c': -65, 'd': 8}, 
    'ch' : {'a': 0.02, 'b': 0.55, 'c': -45, 'd': 4},
    'ib' : {'a': 0.06, 'b': 0.55, 'c': -55, 'd': 3},
}

def objective_function(res_, X_stimuli, labels, params, sampler):
    a, b, c, d = [torch.tensor(list(map(lambda x: neuron_parm_dict[x][i], params))) for i in ['a', 'b', 'c', 'd']]
    res_.update_parms(a=a, b=b, c=c, d=d)
    out_spikes = res_.simulate(X_stimuli, mem_thr=30, train=False, verbose=True)
    state_vec = sampler.sample(out_spikes)
    return SeparationIndex(state_vec, labels).item()

def run_opt(optimizer, objective_):
    for i in range(optimizer.budget):
        x = optimizer.ask()
        loss = -objective_(params=x.value)
        optimizer.tell(x, loss)
        if (i + 1) % 2 == 0:
            print(f"Iteration {i + 1}/{optimizer.budget}, Current loss: {loss}")

def train_dyanmics(reservoir_, X_, y_, sampler_):
    parametrization = ng.p.Choice(['rs','ch','ib'], repetitions=reservoir_.n_neurons)

    optimizer = ng.optimizers.NoisyDiscreteOnePlusOne(parametrization=parametrization, budget=5)
    partial_objective_function = partial(objective_function, res_=reservoir_, X_stimuli=X_, labels=y_, sampler=sampler_)

    run_opt(optimizer, partial_objective_function)
    recommendation = optimizer.provide_recommendation()
    return recommendation.value

In [5]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import svm, metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

from neucube import IzhReservoir
from neucube.sampler import TemporalBinning
from neucube.utils import SeparationIndex

num_folds = 3
kf = KFold(n_splits=num_folds)
sampler = TemporalBinning(bin_size=10)

true_labels = []
predicted_labels = []
separation_values = []
accuracy_values = []
mcc_values = []

for train_index, test_index in tqdm(kf.split(X)):
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]

    izh_res = IzhReservoir(inputs=X.shape[2], c=0.7, l=0.18, input_conn_prob=0.85)
    init_n_type = np.random.choice(['rs','ch','ib'], izh_res.n_neurons, replace=True)
    init_a, init_b, init_c, init_d = [
        torch.tensor(list(map(lambda x: neuron_parm_dict[x][i], init_n_type))) for i in ['a', 'b', 'c', 'd']]
    izh_res.update_parms(a=init_a, b=init_b, c=init_c, d=init_d)

    # izh_res.set_exc_parms(a=0.06, b=0.55, c=-55, d=3) #initial values
    # izh_res.set_inh_parms(a=0.1, b=0.2, c=-65, d=2) #initial values

    opt_parms = train_dyanmics(izh_res, X_train_fold, y_train_fold, sampler)
    opt_a, opt_b, opt_c, opt_d = [
        torch.tensor(list(map(lambda x: neuron_parm_dict[x][i], opt_parms))) for i in ['a', 'b', 'c', 'd']]
    izh_res.update_parms(a=opt_a, b=opt_b, c=opt_c, d=opt_d)
    X_train_opt_spike = izh_res.simulate(X_train_fold, mem_thr=30, train=False, verbose=False)
    X_test_opt_spike = izh_res.simulate(X_test_fold, mem_thr=30, train=False, verbose=False)
    X_train_state_vec = sampler.sample(X_train_opt_spike)
    X_test_state_vec = sampler.sample(X_test_opt_spike)

    param_grid = {'C': [2, 3, 4, 5, 6, 7, 8], 'gamma': [0.1, 0.01, 0.001], 'kernel': ['rbf', 'linear', 'poly']}
    svm_model = svm.SVC()
    mcc_scorer = make_scorer(metrics.matthews_corrcoef)
    grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=10, scoring={'accuracy': 'accuracy', 'mcc': mcc_scorer}, refit='mcc')
    grid_search.fit(X_train_state_vec, y_train_fold)
    y_pred = grid_search.best_estimator_.predict(X_test_state_vec)

    true_labels.extend(y_test_fold)
    predicted_labels.extend(y_pred)
    separation_values.extend([SeparationIndex(X_train_state_vec, y_train_fold), SeparationIndex(X_test_state_vec, y_test_fold)])
    accuracy_values.append(accuracy_score(y_test_fold, y_pred))
    mcc_values.append(metrics.matthews_corrcoef(y_test_fold, y_pred))

100%|██████████| 76/76 [00:15<00:00,  4.94it/s]
100%|██████████| 76/76 [00:15<00:00,  4.83it/s]


Iteration 2/5, Current loss: -0.011109150014817715


100%|██████████| 76/76 [00:15<00:00,  4.77it/s]
100%|██████████| 76/76 [00:15<00:00,  4.80it/s]


Iteration 4/5, Current loss: -0.011141764931380749


100%|██████████| 76/76 [00:16<00:00,  4.74it/s]
100%|██████████| 77/77 [00:15<00:00,  4.82it/s]
100%|██████████| 77/77 [00:16<00:00,  4.76it/s]


Iteration 2/5, Current loss: -0.016034912317991257


100%|██████████| 77/77 [00:15<00:00,  4.84it/s]
100%|██████████| 77/77 [00:17<00:00,  4.50it/s]


Iteration 4/5, Current loss: -0.01627284660935402


100%|██████████| 77/77 [00:16<00:00,  4.76it/s]
100%|██████████| 77/77 [00:15<00:00,  4.87it/s]
100%|██████████| 77/77 [00:16<00:00,  4.59it/s]


Iteration 2/5, Current loss: -0.021958502009510994


100%|██████████| 77/77 [00:17<00:00,  4.39it/s]
100%|██████████| 77/77 [00:17<00:00,  4.37it/s]


Iteration 4/5, Current loss: -0.022191546857357025


100%|██████████| 77/77 [00:17<00:00,  4.50it/s]
3it [06:37, 132.62s/it]


In [6]:
# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
mcc = metrics.matthews_corrcoef(true_labels, predicted_labels)
print("10-Fold Cross-Validation Accuracy:", accuracy)
print("10-Fold Cross-Validation MCC:", mcc)
print(confusion_matrix(true_labels, predicted_labels))
print(accuracy_values)
print(mcc_values)
print(separation_values)

10-Fold Cross-Validation Accuracy: 0.808695652173913
10-Fold Cross-Validation MCC: 0.6245548223441452
[[49 15]
 [ 7 44]]
[0.8974358974358975, 0.8421052631578947, 0.6842105263157895]
[0.4230217115244236, 0.6899094182476204, 0.21159842337288995]
[tensor(0.0111), tensor(0.0418), tensor(0.0163), tensor(0.0400), tensor(0.0222), tensor(0.0116)]


In [7]:
pd.DataFrame({'True_Labels': np.array(true_labels), 'Predicted_Labels': np.array(predicted_labels)}).to_csv('results.csv', index=False)