GLM-eb /
J. Carpenter

In [1]:
# preamble
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import pandas as pd
import statsmodels.api as sm
from patsy import dmatrices


In [2]:
# load & format data
filepath = 'sampleData.mat'
mat = scipy.io.loadmat(filepath)

ST = mat['ST']; P = mat['P']; hd = mat['hd']

#### define methods:

In [39]:
class glm:
    def __init__(self, ST, P, hd):
        self.ST = ST
        self.P = P
        self.x = P[:,2]
        self.y = P[:,3]
        self.hd = (hd[:,0]*np.pi)/180; # 0-2pi
        
    def get_size(self):
        boxsz = np.max([np.max(self.x), np.max(self.y)]);
        return boxsz
    
    def pos_map(self, nbins=10):
        boxsz = self.get_size()
        bins = np.arange(boxsz/nbins/2, boxsz-boxsz/nbins/2, boxsz/nbins)
        posgrid = np.zeros((len(self.x), nbins**2))
        for idx,val in enumerate(self.x):
            xvec = np.abs(self.x[idx]-bins); yvec = np.abs(self.y[idx]-bins);
            min_x = np.min(xvec)
            min_y = np.min(yvec)
            idx_x = np.where(xvec == min_x); idx_x = idx_x[0][0];
            idx_y = np.where(yvec == min_y); idx_y = idx_y[0][0];
            bin_idx = np.ravel_multi_index((idx_y,idx_x), dims=(nbins,nbins), order='C') # a11=0, a12=1, a13=2;
            posgrid[idx, bin_idx] = 1;
        return posgrid, bins
    
    def eb_map(self, nbins=10, rp=[75,75]):
        refx = rp[0]; refy = rp[1];
        allo = np.arctan2(refy-self.y, refx-self.x) + (np.pi/2); # add 90 deg
        allo[allo<0] = allo[allo<0]+2*np.pi;
        ego = allo - self.hd; # shift from 0-2pi
        egogrid = np.zeros((len(P),nbins));
        bins = np.arange(2*np.pi/nbins/2, 2*np.pi-2*np.pi/nbins/2, 2*np.pi/nbins) # 10 bin ctrs
        for idx,val in enumerate(P):
            evec = np.abs(ego[idx]-bins)
            min_e = np.min(evec)
            idx_e = np.where(evec == min_e)
            egogrid[idx, idx_e] = 1;
        return egogrid, bins
    
    def conv_spktrain(self):
        # filter the spiketrain
        t = self.P[:,0];
        boolean_spk = np.logical_and(t[0] <= self.ST, self.ST <= t[-1])
        spikes = self.ST[boolean_spk == True]
        edgesT = np.linspace(t[0], t[-1], len(t)+1)
        binnedSpikes, timeEdges = np.histogram(spikes, edgesT)
        # convolve w/ gaussian membership fn
        Xx = np.linspace(-4,4,9); sigma = 2; c = 0;
        filt = np.exp((-(Xx-c)**2)/(2*(sigma**2)))
        dt = self.P[1,0]-self.P[0,0];
        fr = binnedSpikes/dt # rate (hz)
        smooth_fr = np.convolve(binnedSpikes, filt, mode='full')
        return smooth_fr
    
    def get_speed(self):
        t=self.P[:,0]; x=self.P[:,1]; y=self.P[:,2];
        ntime = len(t); v = np.zeros((ntime,1));
        for idx in range(1,ntime-1):
            v[idx,0] = np.sqrt((x[idx+1]-x[idx-1])**2 + (y[idx+1]-y[idx-1])**2)/(t[idx+1]-t[idx-1])    
        v[0,0] = v[1,0]; v[-1,0] = v[-2,0] # pad the array
        return v
    
    def speed_threshold(self,posgrid,ebgrid,spiketrain):
        v = self.get_speed()
        maxspeed=50; minspeed=4
        inbounds = np.logical_and((v<=maxspeed), (v>=minspeed))
        inbounds = np.where(inbounds==True); inbounds = inbounds[0]
        posgrid = posgrid[inbounds,:]
        ebgrid = ebgrid[inbounds,:]
        spiketrain = spiketrain[inbounds]
        return posgrid, ebgrid, spiketrain
    
    def squish_statemat(self,posgrid,ebgrid):
        '''squish state matrix for 2-variable model (P+EB)'''
        ntime,nbins_eb = np.shape(ebgrid)
        _,nbins_p = np.shape(posgrid)
        A = np.zeros((ntime, nbins_p+nbins_eb)) #P+EB
        A[:,0:nbins_p] = posgrid; A[:,nbins_p:] = ebgrid
        df=pd.DataFrame(A)
        mask = np.random.rand(len(df)) < 0.8
        df_train = df[mask]; df_test = df[~mask]
        # name columns & get expression
        colnames = [];
        expr = 'y ~ '
        for i in range(nbins_p):
            val = str(i);
            expr = expr + 'P' + val + ' + '
            colnames.append('P' + val)
        for i in range(nbins_eb-1):
            val = str(i);
            expr = expr + 'E' + val + ' + '
            colnames.append('E' + val)
        expr = expr + 'E9'
        colnames.append('E9')
        df.columns = colnames
        return df,expr
    
    def test_train(self,df,expr,spiketrain):
        df.insert(loc=0, column='y', value=spiketrain, allow_duplicates=False)
        mask = np.random.rand(len(df)) < 0.8
        df_train = df[mask]
        df_test = df[~mask]
        # split into test and train
        y_train, X_train = dmatrices(expr, df_train, return_type='dataframe')
        y_test, X_test = dmatrices(expr, df_test, return_type='dataframe')
        # info for user
        print('Training data set length='+str(len(df_train)))
        print('Testing data set length='+str(len(df_test)))
        return y_train, X_train, y_test, X_test
    
    def init_params(self,whichVars={'P', 'E'}):
        if whichVars == {'P', 'E'}: init_param = 1e-3*np.random.randn(110+1, 1);
        if whichVars == {'P'}: init_param = 1e-3*np.random.randn(100+1, 1);
        if whichVars == {'E'}: init_param = 1e-3*np.random.randn(10+1, 1);
        return init_param
    
    def get_rate(self,X,param):
        u = np.dot(X,param);
        rate = np.exp(u);
        return rate,u
    
    def get_LL(self,rate,Y):
        '''log-likelihood function'''
        loglik = np.sum(Y*np.log(rate)-rate)
        return loglik
    
    ##################################################################################################################
    
    # functions from the tutorial; will probably change these to simplify (?)
    
    def qu(self,z):
        '''the nonlinearity (w/ softplus)'''
        qu = np.log1p(np.exp(z))
        
    def lmb(self,beta0,beta,X):
        '''conditional intensity function'''
        z = beta0 + np.dot(X,beta)
        l = self.qu(z)
        return l
    
    def penalty(self,alpha,beta):
        '''penalty term'''
        P = 0.5 * (1 - alpha) * np.linalg.norm(beta, 2) ** 2 + \
        alpha * np.linalg.norm(beta, 1)
        return P
    
    def loss(self,beta0, beta, reg_lambda, X, y):
        '''define objective function for elastic net'''
        L = logL(beta0, beta, X, y)
        P = self.penalty(beta)
        J = -L + reg_lambda * P
        return J
    
    def grad_L2loss(self,beta0, beta, reg_lambda, X, y):
        z = beta0 + np.dot(X, beta)
        s = expit(z)
        q = self.qu(z)
        grad_beta0 = np.sum(s) - np.sum(y * s / q)
        grad_beta = np.transpose(np.dot(np.transpose(s), X) -
                                 np.dot(np.transpose(y * s / q), X)) + \
        reg_lambda * (1 - alpha) * beta
        return grad_beta0, grad_beta
    
    def hessian_loss(beta0, beta, alpha, reg_lambda, X, y):
        z = beta0 + np.dot(X, beta)
        q = qu(z)
        s = expit(z)
        grad_s = s * (1-s)
        grad_s_by_q = grad_s/q - s/(q * q)
        hess_beta0 = np.sum(grad_s) - np.sum(y * grad_s_by_q)
        hess_beta = np.transpose(np.dot(np.transpose(grad_s), X * X)
                                - np.dot(np.transpose(y * grad_s_by_q), X * X))\
                                + reg_lambda * (1-alpha)
        return hess_beta0, hess_beta

#### apply methods:

In [40]:
# initialize class instance
g = glm(ST,P,hd)

# prepare the data
posgrid_raw,bins = g.pos_map(nbins=10)
ebgrid_raw,bins = g.eb_map(nbins=10, rp=[75,75])
smooth_fr = g.conv_spktrain()
posgrid,ebgrid,spiketrain = g.speed_threshold(posgrid_raw,ebgrid_raw,smooth_fr)
df,expr = g.squish_statemat(posgrid,ebgrid)

# split data into test/train
y_train, X_train, y_test, X_test = g.test_train(df,expr,spiketrain)

# get initial parameters (beta0 and beta_i's)
init_param = g.init_params(whichVars={'P', 'E'})
param = init_param
X = X_train; Y = y_train;
rate,u = g.get_rate(X,param)

# minimize -loglikelihood fn
loglik = g.get_LL(rate,Y)



Training data set length=38965
Testing data set length=9825
