### GLM-eb 
@author: Jordan, Ben

#### setup:

In [3]:
# preamble
import glmmod # module in .py file
import scipy.io
import time
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import pandas as pd
import statsmodels.api as sm
from patsy import dmatrices
import scipy.sparse as sps
import scipy.stats as stats
from pyglmnet import GLM, simulate_glm
import scipy as sp
import statistics

In [4]:
# visualization parameters
plt.rcParams['figure.figsize'] = (4,2)
plt.rc('axes', labelsize=10); plt.rc('axes', titlesize=10)
plt.style.use('ggplot'); plt.rc('font', size=10);

In [5]:
# load & format data
filepath = 'sampleData.mat'
mat = scipy.io.loadmat(filepath)
ST = mat['ST']; P = mat['P']; hd = mat['hd']

#### execute glm:

In [None]:
# initialize class instance
g = glmmod.glm(ST,P,hd)

# prepare the data
posgrid_raw,bins = g.pos_map(nbins=10)
ebgrid_raw,bins = g.eb_map(nbins=10, rp=[75,75])
smooth_fr, raw_spktrn, filt, dt = g.conv_spktrain() # get spiketrain
posgrid,ebgrid,spiketrain = g.speed_threshold(posgrid_raw,ebgrid_raw,raw_spktrn)

# dictionaries with info about each model
stateDict = {
    0: [posgrid,ebgrid],
    1: posgrid,
    2: ebgrid
}

labelDict = {
    0: 'PE',
    1: 'P',
    2: 'E'
}

allModels = {}
numModels = 3

# get test/train indices (same for each model)
kfoldIdx, kfoldIdx_df = g.kfoldSplit(nfolds=10)

for model in range(numModels):
    modelDict = {}
    # get state matrix
    stateIn = stateDict[model]
    statemat, expr = g.squish_statemat(spiketrain, stateIn, modelType=labelDict[model])

    # optimize model parameters
    kres,train_y, test_y, train_x, test_x, train_y_raw, test_y_raw= g.kfoldOptim(kfoldIdx_df,statemat)

    # check the model fit
    testfit = g.get_testFit(kres,train_y,test_y,train_x,test_x,train_y_raw,test_y_raw)

    modelDict['kfoldIdx'] = kfoldIdx_df
    modelDict['kres'] = kres
    modelDict['train_y'] = train_y
    modelDict['train_x'] = train_x
    modelDict['test_y'] = test_y
    modelDict['test_x'] = test_x
    modelDict['train_y_raw'] = train_y_raw
    modelDict['test_y_raw'] = test_y_raw
    modelDict['testfit'] = testfit
    
    # save in allModels dictionary
    allModels[model] = modelDict

llh, bestModel = g.findBestModel(modelDict)

g.plot_llh(allModels,labelDict)

#### visualize data:

In [None]:
fig, ax = plt.subplots(1, 1);
ax.plot(test_y[fold]); ax.plot(yhat);
ax.set_xlabel('time step'); ax.set_ylabel('y (smoothed rate)');

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 2))
_, bin_edges = np.histogram(y,120)
ax[0].plot(bin_edges, scipy.stats.norm.pdf(bin_edges, loc=y.mean(), scale=y.std()))
ax[0].set_title(r'Distribution of Rates')
ax[0].set_xlabel('rate (hz)')
ax[0].set_ylabel('hist')

sns.kdeplot(y, color='#fcb103', bw=.017,shade=True)
ax[1].set_title(r'Distribution of Rates')
ax[1].set_xlabel('rate (hz)')
ax[1].set_ylabel('probability');

#### visualize results:

In [None]:
plt.bar(['PE', 'P', 'E'], sse);

In [None]:
w_fit = res.x[1:]
b_fit = res.x[0]
y_hat = g.get_rate(X,w_fit,b_fit)

fig, ax = plt.subplots(1, 1, figsize=(20, 8))
ax.plot(y[0:10000], label='data');
ax.plot(smooth_fr_hat_test[0:10000],label='model');
ax.set_title(r'model vs. data')
ax.set_xlabel('time (s)')
ax.set_ylabel('rate (hz)');
ax.legend(loc="upper right");