## SVM benchmark: 
- ### train a SVM to tell apart K and NA channel models

In [1]:
import delfi.distribution as dd
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import pickle
import sys
import time

from lfimodels.channelomics.ChannelSingle import ChannelSingle
from lfimodels.channelomics.ChannelSuper import ChannelSuper
from lfimodels.channelomics.ChannelStats import ChannelStats
from matplotlib import pyplot as plt
from sklearn.svm import SVC

sys.path.append('../../')
from model_comparison.utils import *

%matplotlib inline

## generate observed data from ground truth model

In [2]:
channel_type = 'na'

GT = {'k': np.array([9, 25, 0.02, 0.002]),
      'na': np.array([-35, 9, 0.182, 0.124, -50, -75, 5, -65, 6.2, 0.0091, 0.024])}

LP = {'k': ['qa','tha','Ra','Rb'],
      'na': ['tha','qa','Ra','Rb','thi1','thi2','qi','thinf','qinf','Rg','Rd']}

E_channel = {'k': -86.7, 'na': 50}
fact_inward = {'k': 1, 'na': -1}

gt = GT[channel_type]
cython = True
third_exp_model = True

n_params = len(gt)
labels_params = LP[channel_type]
prior_lims = np.sort(np.concatenate((0.5 * gt.reshape(-1,1), 1.5 * gt.reshape(-1,1)), axis=1))

m = ChannelSuper(channel_type=channel_type, third_exp_model=third_exp_model, cython=cython)
p = dd.Uniform(lower=prior_lims[:,0], upper=prior_lims[:,1])
s = ChannelStats(channel_type=channel_type)

# generate observed data
n_params_obs = len(gt)
m_obs = ChannelSingle(channel_type=channel_type, n_params=n_params_obs, cython=cython)

xo = m_obs.gen(gt.reshape(1,-1))
xo_stats = s.calc(xo[0])

  x, _, _, _ = np.linalg.lstsq(a, b)


## Load training data and split test data

In [3]:
filename = 'training_data_k_na_N10000seed3.p'
folder = '../data/'
fullpath = os.path.join(folder, filename)
ntest = 100

with open(fullpath, 'rb') as f: 
    result_dict = pickle.load(f)

params_k, sx_k, gt_k, prior_lims_k, params_na, sx_na, gt_na, prior_lims_na, seed, n_samples, cython = result_dict.values()

n, n_stats = sx_na.shape
ntrain = n - ntest

# shuffle and set up model index target vector 
sx = np.vstack((sx_k[:ntrain, ], sx_na[:ntrain, ]))
sx_test = np.vstack((sx_k[ntrain:, ], sx_na[ntrain:, ]))

# define model indices
m = np.hstack((-1 * np.ones(ntrain), np.ones(ntrain))).squeeze().astype(int).tolist()
m_test = np.hstack((np.zeros(ntest), np.ones(ntest))).squeeze().astype(int).tolist()

# normalize data 
sx, data_norm = normalize(sx)

## Set up and train the vanilla SVM 


In [4]:
clf = SVC(probability=True)
clf.fit(sx, m)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Predict model index given observed summary stats

In [6]:
sx_test_zt, data_norm = normalize(sx_test, data_norm)
predicted_model_prob = clf.predict_proba(sx_test_zt.reshape(-1, sx_test_zt.shape[1]))
p = predicted_model_prob[:, 0]
y = np.array(m_test)

## Calculate cross entropy loss used for training the MDN

In [7]:
cel = -(y * np.log(1 - p) + (1 - y) * np.log(p))
cel.sum()

0.00967295126265148