In [8]:
import pandas as pd
rna_data = pd.read_csv("gene_data/rna_common_complete.csv")
rna_data = rna_data.sort_values(by=['sn','period']).reset_index(drop=True)

In [9]:
X_og_shape = rna_data.drop(['sn','group','caarms_status','period'],axis=1).values
X_reshaped = X_og_shape.reshape(len(set(rna_data['sn'])), 3, X_og_shape.shape[1])
labels_group = rna_data[rna_data['period'] == 24]['group'].values
labels = [0 if i == 'C' else 1 for i in labels_group]

In [10]:
import torch
import numpy as np
from neucube.utils import SNR
from neucube.utils import interpolate
from neucube.encoder import Delta

ratios = SNR(X_reshaped[:,0,:], labels)
top_idx = torch.argsort(ratios, descending=True)[0:20]
X_reshaped_topidx = X_reshaped[:,:,top_idx]
interpolated_X = interpolate(X_reshaped_topidx, num_points=104)

encoder = Delta(threshold=0.008)
X = encoder.encode_dataset(interpolated_X)
y = torch.tensor(labels)

In [11]:
import nevergrad as ng
import numpy as np
from functools import partial
from neucube.sampler import TemporalBinning
from neucube.utils import SeparationIndex
from tqdm import tqdm

def objective_function(res_, X_stimuli, labels, sampler, params):
    a, b, c, d = params
    res_.update_parms(a=a, b=b, c=c, d=d)
    out_spikes = res_.simulate(X_stimuli, mem_thr=30, train=False, verbose=False)
    #sampler = TemporalBinning(bin_size=10)
    state_vec = sampler.sample(out_spikes)
    return SeparationIndex(state_vec, labels).item()

def run_opt(optimizer, objective_):
    for i in range(optimizer.budget):
        x = optimizer.ask()
        loss = -objective_(params=x.value)
        optimizer.tell(x, loss)
        if (i + 1) % 2 == 0:
            print(f"Iteration {i + 1}/{optimizer.budget}, Current loss: {loss}")

def train_dyanmics(reservoir_, X_, y_, sampler_):
    parametrization = ng.p.Tuple(
        ng.p.Array(init=reservoir_.a.cpu()),
        ng.p.Array(init=reservoir_.b.cpu()),
        ng.p.TransitionChoice(list(range(-65,-46)), repetitions=reservoir_.c.cpu().shape[0]),
        ng.p.TransitionChoice(list(range(2,9)), repetitions=reservoir_.d.cpu().shape[0]),
    )

    parametrization[0].set_bounds(lower=0.01, upper=0.5)
    parametrization[1].set_bounds(lower=0.2, upper=0.75)

    optimizer = ng.optimizers.PortfolioDiscreteOnePlusOne(parametrization=parametrization, budget=1)
    partial_objective_function = partial(objective_function, res_=reservoir_, X_stimuli=X_, labels=y_, sampler=sampler_)

    run_opt(optimizer, partial_objective_function)
    recommendation = optimizer.provide_recommendation()
    optimal_a, optimal_b, optimal_c, optimal_d = recommendation.value
    return recommendation.value

In [12]:
#run exp in 10 fold and allow multiple runs
#remember to include new additions from kaggle, colab notebooks
#run gridsearch on SVM parms in every fold
#calc accuracy and mcc
#save the results in a csv,pickle file

In [13]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import svm, metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

from neucube import IzhReservoir
from neucube.sampler import TemporalBinning
from neucube.utils import SeparationIndex

num_folds = 5
kf = KFold(n_splits=num_folds)
sampler = TemporalBinning(bin_size=10)

true_labels = []
predicted_labels = []
separation_values = []
accuracy_values = []
mcc_values = []

for train_index, test_index in tqdm(kf.split(X)):
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]

    izh_res = IzhReservoir(inputs=X.shape[2], c=0.7, l=0.18, input_conn_prob=0.85)
    izh_res.set_exc_parms(a=0.06, b=0.55, c=-55, d=3) #initial values
    izh_res.set_inh_parms(a=0.01, b=0.2, c=-65, d=8) #initial values

    opt_a, opt_b, opt_c, opt_d = train_dyanmics(izh_res, X_train_fold, y_train_fold, sampler)
    izh_res.update_parms(a=opt_a, b=opt_b, c=opt_c, d=opt_d)
    X_train_opt_spike = izh_res.simulate(X_train_fold, mem_thr=30, train=False, verbose=False)
    X_test_opt_spike = izh_res.simulate(X_test_fold, mem_thr=30, train=False, verbose=False)
    X_train_state_vec = sampler.sample(X_train_opt_spike)
    X_test_state_vec = sampler.sample(X_test_opt_spike)

    param_grid = {'C': [2, 3, 4, 5, 6, 7, 8], 'gamma': [0.1, 0.01, 0.001], 'kernel': ['rbf', 'linear', 'poly']}
    svm_model = svm.SVC()
    mcc_scorer = make_scorer(metrics.matthews_corrcoef)
    grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=10, scoring={'accuracy': 'accuracy', 'mcc': mcc_scorer}, refit='mcc')
    grid_search.fit(X_train_state_vec, y_train_fold)
    y_pred = grid_search.best_estimator_.predict(X_test_state_vec)

    true_labels.extend(y_test_fold)
    predicted_labels.extend(y_pred)
    separation_values.extend([SeparationIndex(X_train_state_vec, y_train_fold), SeparationIndex(X_test_state_vec, y_test_fold)])
    accuracy_values.append(accuracy_score(y_test_fold, y_pred))
    mcc_values.append(metrics.matthews_corrcoef(y_test_fold, y_pred))

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print("10-Fold Cross-Validation Accuracy:", accuracy)
print(confusion_matrix(true_labels, predicted_labels))

0it [00:02, ?it/s]


KeyboardInterrupt: 