## Multiple regression for object-selectivity (no subsample)

This code runs two nested multiple linear regression: one with object-related regressors and the 
other with both object and choice regressors.

This code does not use subsampling method.

In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd

from scipy import stats
from scipy.ndimage import gaussian_filter
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant

import matplotlib as mpl
import matplotlib.pyplot as plt  

from datetime import date
import time

import random

from joblib import Parallel, delayed

import h5py

In [2]:
# no top and right spines in all plots
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.spines.top'] = False

In [3]:
mother_path = Path('D:/Multi-modal project/')

### Parameter setting

In [99]:
num_iter = 100

gauss_sigma = 1

# colors for multimodal, vis-only, aud-only conditions
color = ['mediumorchid','cornflowerblue','lightcoral','gray']
color2 = ['cyan','magenta','brown']
linestyle = ['-',':']

today = str(date.today())

### Data preparation

In [103]:
save_path = mother_path /'analysis'/'result'/'3. Multiple regression for object-selectivity'/today
cell_path = mother_path/'analysis'/'result'/'zFR export'/'13-Apr-2022 (5 trials)'

os.makedirs(save_path,exist_ok=True)
f = h5py.File(save_path/f'{today}_multiple_regression_result.hdf5','w')

cell_list = os.listdir(cell_path)

# Data analysis

In [68]:
def shuffle_and_MLR(df):
    """
    This function shuffles trial conditions and perform multiple linear regression
    to get null distribution of multiple linear regression results.
    """

    # shuffle trial conditions to get null distribution
    state = np.random.get_state()
    x1_shuffle = np.random.permutation(x1)
    np.random.set_state(state)
    x2_shuffle = np.random.permutation(x2)
    
    beta_coef_shuffle, beta_coef_choice_shuffle = [],[]
    rsquared_shuffle, rsquared_choice_shuffle = [],[]
    BIC_shuffle, BIC_choice_shuffle = [],[]
    AIC_shuffle, AIC_choice_shuffle = [],[]
    
    # calculate beta coefficients and r-squared values in each time bin
    for t in range(95):            
    
        lr1_shuffle = OLS(fr[:,t],add_constant(x1_shuffle)).fit()
        lr2_shuffle = OLS(fr[:,t],add_constant(x2_shuffle)).fit()

        beta_coef_shuffle.append(lr1_shuffle.params[1:])
        beta_coef_choice_shuffle.append(lr2_shuffle.params[1:])

        rsquared_shuffle.append(lr1_shuffle.rsquared)
        rsquared_choice_shuffle.append(lr2_shuffle.rsquared)

        BIC_shuffle.append(lr1_shuffle.bic)
        BIC_choice_shuffle.append(lr2_shuffle.bic)

        AIC_shuffle.append(lr1_shuffle.aic)
        AIC_choice_shuffle.append(lr2_shuffle.aic)
        
    return beta_coef_shuffle, beta_coef_choice_shuffle,\
            rsquared_shuffle, rsquared_choice_shuffle,\
            BIC_shuffle, BIC_choice_shuffle,\
            AIC_shuffle, AIC_choice_shuffle

In [94]:
def plot_SDF_beta(df,linewidth,smooth,save,save_format):
    """
    This function plots mean firing rate patterns of each stimulus condition
    and beta coefficients for visual and auditory regressors in multiple linear regression.
    """
    cond = [(df.Type=='Multimodal')&(df.RWD_Loc==boy_goal),
            (df.Type=='Multimodal')&(df.RWD_Loc==egg_goal),
            (df.Type=='Visual')&(df.RWD_Loc==boy_goal),
            (df.Type=='Visual')&(df.RWD_Loc==egg_goal),
            (df.Type=='Auditory')&(df.RWD_Loc==boy_goal),
            (df.Type=='Auditory')&(df.RWD_Loc==egg_goal),
            (df.Type=='Elemental')&(df.RWD_Loc==boy_goal),
            (df.Type=='Elemental')&(df.RWD_Loc==egg_goal)]
    
    cell_full_name = cell_name.strip('.csv')
    
    fr_mean = np.zeros((len(cond),95))
    fr_sem = np.zeros((len(cond),95))
    for i in range(len(cond)):
        fr_mean[i,:] = df[cond[i]].iloc[:,fr_id:fr_id+95].to_numpy().mean(axis=0)
        fr_sem[i,:] = stats.sem(df[cond[i]].iloc[:,fr_id:fr_id+95].to_numpy())
    
    if smooth:
        for i in range(len(cond)):
            fr_mean[i,:] = gaussian_filter(fr_mean[i,:],sigma=gauss_sigma)
            fr_sem[i,:] = gaussian_filter(fr_sem[i,:],sigma=gauss_sigma)
            
    y_max = np.ceil(np.max(fr_mean+fr_sem))
    y_min = np.ceil(np.abs(np.min(fr_mean-fr_sem)))*-1
    
    fig,ax = plt.subplots(3,3,figsize=(15,10))
    plt.suptitle(cell_full_name,fontsize=15);
    x = np.arange(95)*10
    
    for i in range(len(cond)):
        if i%2==0:
            ls = linestyle[0]
        else:
            ls = linestyle[1]            
        ax[0,0].plot(x,fr_mean[i,:],color=color[int(np.floor(i/2))],linewidth=linewidth,linestyle=ls)
        ax[0,0].fill_between(x,fr_mean[i,:]-fr_sem[i,:],fr_mean[i,:]+fr_sem[i,:],color=color[int(np.floor(i/2))],alpha=0.2)
        
        ax[i%2+1,0].plot(x,fr_mean[i,:],color=color[int(np.floor(i/2))],linewidth=linewidth,linestyle=ls)
        ax[i%2+1,0].fill_between(x,fr_mean[i,:]-fr_sem[i,:],fr_mean[i,:]+fr_sem[i,:],color=color[int(np.floor(i/2))],alpha=0.2)
        
        if i<6:
            ax[int(np.floor(i/2)),1].plot(x,fr_mean[i,:],color=color[int(np.floor(i/2))],linewidth=linewidth,linestyle=ls)
            ax[int(np.floor(i/2)),1].fill_between(x,fr_mean[i,:]-fr_sem[i,:],fr_mean[i,:]+fr_sem[i,:],color=color[int(np.floor(i/2))],alpha=0.2)
    
    # control stimulus SDF
    for i in range(3):
        ax[i,1].plot(x,fr_mean[6,:],color=color[3])
        ax[i,1].plot(x,fr_mean[7,:],color=color[3],linestyle=':')
            
    for i in range(6):
        ax[i%3,int(np.floor(i/3))].set_yticks(np.arange(y_min,y_max+0.1,1))
        ax[i%3,int(np.floor(i/3))].set_ylabel('z-scored FR',fontsize=13)
        ax[i%3,int(np.floor(i/3))].set_xlabel('Time (ms)',fontsize=13)  
        ax[i%3,int(np.floor(i/3))].set_xticks([0,400,950])
        ax[i%3,int(np.floor(i/3))].set_xlim([0,950])    
    
    # beta coefficient plot
    t2 = ['Visual term','Auditory term','Interaction term']
    c2 = ['blue','red','green']
    for i in range(3):
        ax[i,2].plot(x,beta_coef[:,i],color=c2[i],linestyle='-',linewidth=linewidth)
        ax[i,2].plot(x,beta_coef[:,i+3],color=c2[i],linestyle=':',linewidth=linewidth)    
            
        ax[i,2].set_title(t2[i],fontsize=13)
        ax[i,2].set_yticks(np.arange(-1,1.1,0.5))    
        ax[i,2].set_ylabel('Beta coefficient',fontsize=13)
        ax[i,2].set_xlabel('Time (ms)',fontsize=13)    
        ax[i,2].set_xticks([0,400,950])
        ax[i,2].set_xlim([0,950])
    
    plt.tight_layout()
    
    if save:          
        fig_path = save_path/region
        if os.path.exists(fig_path) is False:
            os.makedirs(fig_path)    
        if save_format=='png':
            plt.savefig(fig_path/f'{cell_full_name}.png',dpi=100,facecolor='white')
        elif save_format=='svg':
            plt.savefig(fig_path/f'{cell_full_name}.svg')
        plt.close()

In [101]:
def save_result(f):
    """
    This function saves data into HDF5 format.
    """
    cell_group = f.create_group(str(cell_id))
    beta_group = f.create_group(f'{cell_id}/beta_coef')
    rs_group = f.create_group(f'{cell_id}/rsquared')
    BIC_group = f.create_group(f'{cell_id}/BIC')
    AIC_group = f.create_group(f'{cell_id}/AIC')
    
    beta_group.create_dataset('basic',data=beta_coef)
    beta_group.create_dataset('extended',data=beta_coef_choice)
    beta_group.create_dataset('basic_shuffle',data=beta_coef_shuffle)
    beta_group.create_dataset('extended_shuffle',data=beta_coef_choice_shuffle)
    
    rs_group.create_dataset('basic',data=rsquare)
    rs_group.create_dataset('extended',data=rsquare_choice)
    rs_group.create_dataset('basic_shuffle',data=rsquare_shuffle)
    rs_group.create_dataset('extended_shuffle',data=rsquare_choice_shuffle)
    
    BIC_group.create_dataset('basic',data=BIC)
    BIC_group.create_dataset('extended',data=BIC_choice)
    BIC_group.create_dataset('basic_shuffle',data=BIC_shuffle)
    BIC_group.create_dataset('extended_shuffle',data=BIC_choice_shuffle)    
    
    AIC_group.create_dataset('basic',data=AIC)
    AIC_group.create_dataset('extended',data=AIC_choice)
    AIC_group.create_dataset('basic_shuffle',data=AIC_shuffle)
    AIC_group.create_dataset('extended_shuffle',data=AIC_choice_shuffle)           
    
    cell_group.attrs['Rat'] = rat_id
    cell_group.attrs['Region'] = region
    cell_group.attrs['Session'] = session_id

In [104]:
%%time
for cell_run,cell_name in enumerate(cell_list):
    loop_start = time.time()
    # get information about the cell
    cell_info = cell_name.split('-')
    cell_id, rat_id, session_id, region = int(cell_info[0]), cell_info[1], cell_info[2], cell_info[5]
    
    if (rat_id=='654')&(session_id=='4'):
        continue
    
    # load cell data
    df = pd.read_csv(cell_path/cell_name)
    df.drop(df[df.Correctness==0].index,inplace=True)
    df.reset_index(inplace=True,drop=True)
    df[['Visual','Auditory']] = df[['Visual','Auditory']].fillna('no')
    
    boy_goal = df.loc[df['Visual']=='Boy','RWD_Loc'].values[0]
    boy_aud = df.loc[df['RWD_Loc']==boy_goal,'Auditory'].values[0]
    
    egg_goal = df.loc[df['Visual']=='Egg','RWD_Loc'].values[0]
    egg_aud = df.loc[df['RWD_Loc']==egg_goal,'Auditory'].values[0]  
    
    df['Boy-V'] = (df['Visual'] == 'Boy').astype(int)
    df['Boy-A'] = (df['Auditory'] == boy_aud).astype(int)
    df['Egg-V'] = (df['Visual'] == 'Egg').astype(int)
    df['Egg-A'] = (df['Auditory'] == egg_aud).astype(int)
    
    df['Boy-int'] = df['Boy-V']*df['Boy-A']
    df['Egg-int'] = df['Egg-V']*df['Egg-A']
    
    df['Choice'] = (df['RWD_Loc']==boy_goal).astype(int) 
    
    fr_id = df.columns.get_loc('Var10')  # get the index of the first firing rate column
    fr = df.iloc[:,fr_id:fr_id+95].to_numpy()    # get firing rate data into array
    
    x1 = df[['Boy-V','Boy-A','Boy-int','Egg-V','Egg-A','Egg-int']]
    x2 = df[['Boy-V','Boy-A','Boy-int','Egg-V','Egg-A','Egg-int','Choice']]

    beta_coef, beta_coef_choice = [],[]
    rsquare, rsquare_choice = [],[]
    BIC, BIC_choice = [],[]
    AIC, AIC_choice = [],[]

    for t in range(95):
        lr1 = OLS(fr[:,t],add_constant(x1)).fit()
        lr2 = OLS(fr[:,t],add_constant(x2)).fit()
        
        beta_coef.append(lr1.params[1:])
        beta_coef_choice.append(lr2.params[1:])
    
        rsquare.append(lr1.rsquared)
        rsquare_choice.append(lr2.rsquared)
    
        BIC.append(lr1.bic)
        BIC_choice.append(lr2.bic)
    
        AIC.append(lr1.aic)
        AIC_choice.append(lr2.aic)

    beta_coef = np.array(beta_coef)
    beta_coef_choice = np.array(beta_coef_choice)        
    
    results = Parallel(n_jobs=-1)(delayed(shuffle_and_MLR)(df) for i in range(num_iter))

    beta_coef_shuffle = np.array([r[0] for r in results])
    beta_coef_choice_shuffle = np.array([r[1] for r in results])
        
    rsquare_shuffle = np.array([r[2] for r in results])
    rsquare_choice_shuffle = np.array([r[3] for r in results])
    
    BIC_shuffle = np.array([r[4] for r in results])
    BIC_choice_shuffle = np.array([r[5] for r in results])
    
    AIC_shuffle = np.array([r[6] for r in results])
    AIC_choice_shuffle = np.array([r[7] for r in results])    

    plot_SDF_beta(df,2,1,1,'png')
    
    # save results into HDF5 format
    save_result(f)
            
    loop_end = time.time()
    loop_time = divmod(loop_end-loop_start,60)
    print(cell_name.strip('.csv'), f'////// {cell_run+1}/{len(cell_list)} completed  //////  {int(loop_time[0])} min {loop_time[1]:.2f} sec')

0003-600-1-1-Crossmodal-TeV-deep-(-7.32 mm)-TT4.1 ////// 1/888 completed  //////  0 min 4.65 sec
0004-600-1-1-Crossmodal-TeV-deep-(-7.32 mm)-TT4.2 ////// 2/888 completed  //////  0 min 4.90 sec
0005-600-1-1-Crossmodal-TeV-deep-(-7.32 mm)-TT4.3 ////// 3/888 completed  //////  0 min 4.74 sec
0006-600-1-1-Crossmodal-PER-superficial-(-7.2 mm)-TT5.1 ////// 4/888 completed  //////  0 min 4.66 sec
0007-600-1-1-Crossmodal-PER-superficial-(-7.2 mm)-TT5.2 ////// 5/888 completed  //////  0 min 4.54 sec
0008-600-1-1-Crossmodal-PER-superficial-(-7.2 mm)-TT5.3 ////// 6/888 completed  //////  0 min 5.30 sec


KeyboardInterrupt: 

In [105]:
f.close()
print('END')

END
