In [1]:
from scipy.io import loadmat
import h5py
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import polynomial as poly

%matplotlib inline

In [2]:
filelist = ["calcium_data/140708B_140811a_result", 
            "calcium_data/140909C_141112a_result", 
            "calcium_data/141006C_141121a_result",
            "calcium_data/150109A_150302a_result", 
            "calcium_data/151122A_160202a_result", 
            "calcium_data/151122B_160207a_result", 
            "calcium_data/160209A_160430a_result", 
            "calcium_data/160209B_160428a_result"]

In [3]:
def SVM_presence_single(filename, amp, svm_kernel='linear', deg=6, cv=5):
    '''
    a
    '''
    f = h5py.File(filename+".hdf5", "r")
    data = f['data']
    meta = f['meta']
    data = poly.fit_polynomial(filename, deg=deg, exclude_mask=None)
    #load data

    stims = np.unique(meta[:,1])[1:]        #exclude zero-stim-trials
    assert(amp in list(stims)), "this stimAmp was not used in chosen experiment. Cose one from {}.".format(stims)
    #load stimAmps and check whether the chosen amp matches one of them

    clf = svm.SVC(kernel=svm_kernel)
    #create classifier
    
    n_dendrites = data.shape[2]
    #number of dendrites, loop over these
    
    sc = np.zeros((n_dendrites, cv))
    #will later hold the cv-scores
    
    present_mask = meta[:,1]==amp
    absent_mask = meta[:,1]==0
    #boolean masks of the trials where stimAmp==amp and stimAmp==0
    
    trials_mask = np.logical_or(present_mask, absent_mask)
    #boolean mask for all trials where the stimulus is either our chosen one or absent (0)
    
    amp_mask = meta[:,1][trials_mask]==amp
    y_true = (amp_mask-0.5)*2
    #take all trials used for making y_score. Mark the present ones with a '1', absent ones with a '-1'

    for site in range(n_dendrites):
        y_score = data[:, trials_mask, site]

        scores = cross_val_score(clf, y_score.T, y_true.reshape(y_true.shape[0]), cv=cv)
        #compute accuracy scores using crossvalidation
        
        sc[site, :] = scores
        
    return sc

In [4]:
def SVM_presence_combined(filename, svm_kernel='linear', deg=6, cv=10):
    '''
    Trains an SVM on the Ca2+ data of all dendrites to detect stimulus presence.
    
    filename: string, name of experiment file
    svm_kernel: string, specifies svm kernel to use for training
    cv: int, number of folds for cv
    
    returns: n_stims x cv ndarray. Each row holds the accuracy values for each fold for one stimulus strength.
    '''
    f = h5py.File(filename+".hdf5", "r")
    #data = f['data']
    meta = f['meta']
    coefs = poly.fit_polynomial(filename, deg=deg, exclude_mask=None)
    data = poly.get_f_matrix(coefs)
    
    stims = np.unique(meta[:,1])[1:]        #exclude zero
    #load data and stimulia
    
    sc = np.zeros((stims.shape[0], cv))
    #will hold scores
    
    for i, amp in enumerate(stims):
        present_mask = meta[:,1]==amp
        absent_mask = meta[:,1]==0
        #masks for present and absent trials
        
        trials_mask = np.logical_or(present_mask, absent_mask)
        #combine these to mask of all trials with stimulus zero or the chosen stimulus
    
        amp_mask = meta[:,1][trials_mask]==amp
        y_true = (amp_mask-0.5)*2
        #stimulus present trials are marked as '1', absent ones as '-1'

        y_score = data[trials_mask, :]
        #scores used are mean dendritic changes in chosen trials

        clf = svm.SVC(kernel=svm_kernel)
        scores = cross_val_score(clf, y_score, y_true.reshape(y_true.shape[0]), cv=cv)
        #make classifier and compute accuracy scores using cross validation
        
        sc[i, :] = scores
    return sc

In [25]:
def SVM_presence_combined(filename, svm_kernel='linear', deg=6, cv=10):
    '''
    Trains an SVM on the Ca2+ data of all dendrites to detect stimulus presence.
    
    filename: string, name of experiment file
    svm_kernel: string, specifies svm kernel to use for training
    cv: int, number of folds for cv
    
    returns: n_stims x cv ndarray. Each row holds the accuracy values for each fold for one stimulus strength.
    '''
    f = h5py.File(filename+".hdf5", "r")
    #data = f['data']
    meta = f['meta']
    coefs = poly.fit_polynomial(filename, deg=deg, exclude_mask=None)
    #data = poly.get_f_matrix(coefs)
    
    stims = np.unique(meta[:,1])[1:]        #exclude zero
    #load data and stimulia
    
    sc = np.zeros((stims.shape[0], cv))
    #will hold scores
    
    for i, amp in enumerate(stims):
        present_mask = meta[:,1]==amp
        absent_mask = meta[:,1]==0
        #masks for present and absent trials
        
        trials_mask = np.logical_or(present_mask, absent_mask)
        #combine these to mask of all trials with stimulus zero or the chosen stimulus
    
        amp_mask = meta[:,1][trials_mask]==amp
        y_true = (amp_mask-0.5)*2
        #stimulus present trials are marked as '1', absent ones as '-1'

        #y_score = data[trials_mask, :]
        y_score = np.transpose(coefs[:, trials_mask, :], axes=[1,2,0])
        y_score = y_score.reshape(y_score.shape[0], -1)
        print(y_score.shape)
        #scores used are mean dendritic changes in chosen trials

        clf = svm.SVC(kernel=svm_kernel)
        scores = cross_val_score(clf, y_score, y_true.reshape(y_true.shape[0]), cv=cv)
        #make classifier and compute accuracy scores using cross validation
        
        sc[i, :] = scores
    return sc

In [39]:
scs = SVM_presence_single(filelist[0], 3, svm_kernel='linear', deg=7, cv=5)

In [40]:
means = np.mean(scs, axis=1)
sdvs = np.std(scs, axis=1)

for site, mean in enumerate(means):
    print("For recording site {0}: accuracy: {1: .2f} (+/- {2: .2f})".format(site, mean, sdvs[site]*2))

For recording site 0: accuracy:  0.66 (+/-  0.20)
For recording site 1: accuracy:  0.76 (+/-  0.07)
For recording site 2: accuracy:  0.48 (+/-  0.12)
For recording site 3: accuracy:  0.46 (+/-  0.04)
For recording site 4: accuracy:  0.53 (+/-  0.12)
For recording site 5: accuracy:  0.49 (+/-  0.17)
For recording site 6: accuracy:  0.49 (+/-  0.17)
For recording site 7: accuracy:  0.63 (+/-  0.21)
For recording site 8: accuracy:  0.45 (+/-  0.14)
For recording site 9: accuracy:  0.53 (+/-  0.10)
For recording site 10: accuracy:  0.56 (+/-  0.07)
For recording site 11: accuracy:  0.51 (+/-  0.17)
For recording site 12: accuracy:  0.53 (+/-  0.16)
For recording site 13: accuracy:  0.43 (+/-  0.21)
For recording site 14: accuracy:  0.52 (+/-  0.10)
For recording site 15: accuracy:  0.52 (+/-  0.12)
For recording site 16: accuracy:  0.60 (+/-  0.09)
For recording site 17: accuracy:  0.60 (+/-  0.15)
For recording site 18: accuracy:  0.54 (+/-  0.21)
For recording site 19: accuracy:  0.58 (+

In [41]:
n_out = 6
inds = np.argsort(means)[::-1]

print("{} recordings sites best for classification:".format(n_out))
for i in range(n_out):
    print("At recording site {0}: accuracy: {1: .2f} (+/- {2: .2f})".format(inds[i], means[inds[i]], sdvs[inds[i]]*2))

6 recordings sites best for classification:
At recording site 1: accuracy:  0.76 (+/-  0.07)
At recording site 111: accuracy:  0.75 (+/-  0.28)
At recording site 109: accuracy:  0.73 (+/-  0.21)
At recording site 97: accuracy:  0.71 (+/-  0.18)
At recording site 33: accuracy:  0.71 (+/-  0.15)
At recording site 66: accuracy:  0.71 (+/-  0.16)


In [26]:
filename = filelist[0]
f = h5py.File(filename+".hdf5", "r")
meta = f['meta']
stims = np.unique(meta[:,1])[1:]        #exclude zero

sc = SVM_presence_combined(filename, deg=6)

for k in range(sc.shape[0]):
    print("For stimApm {0: .2f}: accuracy: {1: .2f} (+/- {2: .2f})".format(stims[k], np.mean(sc[k,:]), 2*np.std(sc[k,:])))

(100, 805)
(100, 805)
(100, 805)
(100, 805)
(100, 805)
(100, 805)
For stimApm  0.50: accuracy:  0.46 (+/-  0.26)
For stimApm  1.00: accuracy:  0.56 (+/-  0.16)
For stimApm  1.50: accuracy:  0.53 (+/-  0.38)
For stimApm  2.00: accuracy:  0.56 (+/-  0.34)
For stimApm  2.50: accuracy:  0.55 (+/-  0.33)
For stimApm  3.00: accuracy:  0.46 (+/-  0.27)
