## Multiple linear regression (all bins)
This code runs multiple linear regression within object-selective epoch defined based on ANOVA result. 

Response-selective neurons were filtered out by nested regression.

In [1]:
import os
from pathlib import Path
import numpy as np
import pandas as pd

from scipy import stats
from scipy.stats.mstats import zscore

from scipy.ndimage import gaussian_filter
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant

import matplotlib as mpl
import matplotlib.pyplot as plt  

from datetime import date
import time

import random

from joblib import Parallel, delayed

import h5py

In [2]:
# no top and right spines in all plots
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.spines.top'] = False

In [3]:
mother_path = Path('D:/Multi-modal project/')

### Parameter setting

In [4]:
num_iter = 1000

gauss_sigma = 2

# colors for multimodal, vis-only, aud-only conditions
color = ['tab:purple','tab:blue','tab:red','tab:green']
color2 = ['tab:cyan', 'tab:pink', 'tab:gray']
linestyle = ['-',':']

today = str(date.today())

### Data preparation

In [5]:
save_path = mother_path /'analysis'/'result'/'3.1 Multiple linear regression'/today
cell_path = mother_path/'analysis'/'result'/'zFR export'/'13-Jun-2023'
data_path = mother_path /'analysis'/'result'/'3. ANOVA'

cell_list = os.listdir(cell_path)

# load hdf5 files containing ANOVA results
f = h5py.File(data_path/'2023-07-10'/'2023-07-10_ANOVA_result.hdf5','r')

# make hdf5 save file
os.makedirs(save_path,exist_ok=True)
s = h5py.File(save_path/f'{today}_multiple_regression_allbins_subsample.hdf5','w')

### Data analysis

In [6]:
def groupby_index(df):
    """
    This function is to extract trial indices of each stimulus condition.
    Those indices are used to subsample using another function.
    """
    gb = df.groupby(['Visual', 'Auditory'])

    Boy_M_id = gb.get_group(('Boy', boy_aud)).index.to_numpy()
    Boy_V_id = gb.get_group(('Boy', 'no')).index.to_numpy()
    Boy_A_id = gb.get_group(('no', boy_aud)).index.to_numpy()
    
    Egg_M_id = gb.get_group(('Egg', egg_aud)).index.to_numpy()
    Egg_V_id = gb.get_group(('Egg', 'no')).index.to_numpy()
    Egg_A_id = gb.get_group(('no', egg_aud)).index.to_numpy()
    
    C_L_id = gb.get_group(('Left', 'no')).index.to_numpy()
    C_R_id = gb.get_group(('Right', 'no')).index.to_numpy()

    cond_id = [Boy_M_id, Boy_V_id, Boy_A_id, Egg_M_id, Egg_V_id, Egg_A_id, C_L_id, C_R_id]  
    
    return cond_id

In [7]:
def subsample_and_MLR(df,num_trials,subsample):
    """
    Multiple linear regression is applied to subsampled or all trials to validate the reliability
    of coefficients.
    """
    
    if subsample == 1:
        subsample = np.array([])
        # subsample 5 trials from each stimulus condition
        for c in cond_id:
            subsample = np.append(subsample,np.random.choice(c,num_trials,replace=True))        
        subsample = np.asarray(subsample,dtype=int)
        X = x[subsample,:]
        Y = fr[subsample,:]
    else:
        X = x
        Y = fr
        
    beta_coef = np.zeros((4,90))
    # calculate beta coefficients and r-squared values in each time bin
    for t in range(90):
        lr = OLS(zscore(Y[:,t]),add_constant(zscore(X))).fit()            
        beta_coef[:,t] = lr.params[1:]
        
    return beta_coef

In [8]:
def plot_SDF_beta(df,linewidth,smooth,save,save_format):
    """
    This function plots mean firing rate patterns of each stimulus condition
    and beta coefficients for visual and auditory terms in multiple linear regression.
    """
    cond = [(df.Type=='Multimodal')&(df.RWD_Loc==boy_goal),
            (df.Type=='Multimodal')&(df.RWD_Loc==egg_goal),            
            (df.Type=='Visual')&(df.RWD_Loc==boy_goal),
            (df.Type=='Visual')&(df.RWD_Loc==egg_goal),
            (df.Type=='Auditory')&(df.RWD_Loc==boy_goal),            
            (df.Type=='Auditory')&(df.RWD_Loc==egg_goal)]
    
    cell_full_name = cell_name.strip('.csv')
    
    fr_mean = np.zeros((10,90))
    fr_sem = np.zeros((10,90))
    for i in range(6):
        fr_mean[i,:] = df[cond[i]].iloc[:,fr_id:fr_id+90].to_numpy().mean(axis=0)
        fr_sem[i,:] = stats.sem(df[cond[i]].iloc[:,fr_id:fr_id+90].to_numpy())
    
    if smooth:
        for i in range(6):
            fr_mean[i,:] = gaussian_filter(fr_mean[i,:],sigma=gauss_sigma)
            fr_sem[i,:] = gaussian_filter(fr_sem[i,:],sigma=gauss_sigma)
            
    y_max = np.ceil(np.max(fr_mean+fr_sem))
    y_min = 0
    
    fig,ax = plt.subplots(3,3,figsize=(10.5,7.5))
    plt.suptitle(cell_full_name,fontsize=15);
    x = np.arange(90)*10
    
    for i in range(6):
        ax[0,0].plot(x,fr_mean[i,:],color=color[divmod(i,2)[0]],linestyle=linestyle[divmod(i,2)[1]])
        ax[0,0].fill_between(x,fr_mean[i,:]-fr_sem[i,:],fr_mean[i,:]+fr_sem[i,:],
                             color=color[divmod(i,2)[0]],alpha=0.2)
    ax[0,0].set_yticks([0, y_max])
    ax[0,0].set_ylim([0, y_max])
    ax[0,0].set_ylabel('Firing rates (Hz)', fontsize=13)
    ax[0,0].set_xticks([0,400,900])
    ax[0,0].set_xlim([0,900])
    ax[0,0].set_xlabel('Time (ms)', fontsize=13)
    
    for i in range(4):
        ax[1,0].plot(x, beta_coef[i,:], color=color2[divmod(i,2)[1]], linestyle=linestyle[divmod(i,2)[0]])
        ax[divmod(i,2)[1],divmod(i,2)[0]+1].plot(x, beta_coef[i,:], color=color2[divmod(i,2)[1]])
        ax[divmod(i,2)[1],divmod(i,2)[0]+1].plot(x, beta_coef_ss[i,:], color=color2[2])
    
    axs = [(1,0), (0,1), (1,1), (0,2), (1,2)]
    title = ['All coefficients', 'Boy-image', 'Boy-sound', 'Egg-image', 'Egg-sound']
    for a, t in zip(axs, title):
        ax[a].set_yticks(np.arange(-0.5,1.1,0.5))
        ax[a].set_ylim([-0.5, 1])
        ax[a].set_ylabel('Coefficient', fontsize=13)
        ax[a].set_xticks([0,400,900])
        ax[a].set_xlim([0,900])
        ax[a].set_xlabel('Time (ms)', fontsize=13)
        ax[a].set_title(t, fontsize=13)
        
    label = ['Boy-M', 'Boy-V', 'Boy-A', 'Egg-M', 'Egg-V', 'Egg-A', 'C-L', 'C-R']
    for i in range(len(label)):
        ax[2,0].text(0,1-0.12*i,f'{label[i]}: {len(cond_id[i])}', fontsize=11)
    ax[2,0].axis('off')
    ax[2,1].axis('off')
    ax[2,2].axis('off')
        
    plt.tight_layout()
    
    if save:
        fig_path = save_path / region
        os.makedirs(fig_path, exist_ok=True)

        if save_format == 'png':
            plt.savefig(fig_path / f'{cell_full_name}.png', dpi=100, facecolor='white')
        elif save_format == 'svg':
            plt.savefig(fig_path / f'{cell_full_name}.svg')
        elif save_format == 'both':
            plt.savefig(fig_path / f'{cell_full_name}.png', dpi=100, facecolor='white')
            os.makedirs(fig_path / 'svg', exist_ok=True)
            plt.savefig(fig_path / 'svg' / f'{cell_full_name}.svg')

        plt.close()

In [9]:
def save_result(f):
    """
    This function saves regression results into HDF5 format.
    """
    cell_group = f.create_group(str(cell_id))
    
    cell_group.create_dataset('beta_coef', data=beta_coef)
    cell_group.create_dataset('beta_coef_subsample', data=beta_coef_subsample)
    
    cell_group.attrs['rat'] = rat_id
    cell_group.attrs['region'] = region
    cell_group.attrs['session'] = session_id

In [10]:
%%time
for cell_run,cell_name in enumerate(cell_list):
    loop_start = time.time()
    # get information about the cell
    cell_info = cell_name.split('-')
    cell_id, rat_id, session_id, region = int(cell_info[0]), cell_info[1], cell_info[2], cell_info[5]
        
    # skip non object-selective cells
    if f[str(cell_id)].attrs['object cell'] == 0:
        continue

    # load cell data
    df = pd.read_csv(cell_path/cell_name)
    df.drop(df[df.Correctness==0].index,inplace=True)
    df.reset_index(inplace=True,drop=True)
    df[['Visual','Auditory']] = df[['Visual','Auditory']].fillna('no')
    
    boy_goal = df.loc[df['Visual']=='Boy','RWD_Loc'].values[0]
    boy_aud = df.loc[df['RWD_Loc']==boy_goal,'Auditory'].values[0]
    
    egg_goal = df.loc[df['Visual']=='Egg','RWD_Loc'].values[0]
    egg_aud = df.loc[df['RWD_Loc']==egg_goal,'Auditory'].values[0]  
    
    df['Boy-V'] = (df['Visual'] == 'Boy').astype(int)
    df['Boy-A'] = (df['Auditory'] == boy_aud).astype(int)
    df['Egg-V'] = (df['Visual'] == 'Egg').astype(int)
    df['Egg-A'] = (df['Auditory'] == egg_aud).astype(int)
    
    fr_id = df.columns.get_loc('Var10')    # get the index of the first firing rate column
    fr = df.iloc[:,fr_id:fr_id+90].to_numpy()    # get firing rate data into array
    
    cond_id = groupby_index(df)
    
    x = df[['Boy-V','Boy-A','Egg-V','Egg-A']].to_numpy()    
    
    beta_coef = subsample_and_MLR(df,5,0)
    beta_coef_subsample = Parallel(n_jobs=-1)(delayed(subsample_and_MLR)(df,5,1) for i in range(num_iter))
    beta_coef_ss = np.mean(beta_coef_subsample,axis=0)

    plot_SDF_beta(df,2,1,1,'both')    
    
    # save results into HDF5 format
    save_result(s)
            
    loop_end = time.time()
    loop_time = divmod(loop_end-loop_start,60)
    print(cell_name.strip('.csv'), f'////// {cell_run+1}/{len(cell_list)} completed  //////  {int(loop_time[0])} min {loop_time[1]:.2f} sec')

0004-600-1-1-Crossmodal-TeV-deep-(-7.32 mm)-TT4.2 ////// 2/888 completed  //////  0 min 8.92 sec
0006-600-1-1-Crossmodal-PER-superficial-(-7.2 mm)-TT5.1 ////// 4/888 completed  //////  0 min 2.69 sec
0008-600-1-1-Crossmodal-PER-superficial-(-7.2 mm)-TT5.3 ////// 6/888 completed  //////  0 min 2.44 sec
0013-600-1-1-Crossmodal-PER-deep-(-6.48 mm)-TT8.2 ////// 9/888 completed  //////  0 min 2.38 sec
0015-600-1-1-Crossmodal-TeV-deep-(-6.36 mm)-TT9.1 ////// 11/888 completed  //////  0 min 2.51 sec
0016-600-1-1-Crossmodal-PER-deep-(-6.24 mm)-TT10.1 ////// 12/888 completed  //////  0 min 3.08 sec
0017-600-1-1-Crossmodal-PER-deep-(-6.24 mm)-TT10.2 ////// 13/888 completed  //////  0 min 3.62 sec
0018-600-1-1-Crossmodal-PER-superficial-(-6.24 mm)-TT13.1 ////// 14/888 completed  //////  0 min 3.93 sec
0021-600-1-1-Crossmodal-TeV-superficial-(-6.96 mm)-TT17.1 ////// 15/888 completed  //////  0 min 2.69 sec
0024-600-1-1-Crossmodal-POR-deep-(-7.56 mm)-TT24.2 ////// 18/888 completed  //////  0 min 2.

KeyboardInterrupt: 

In [11]:
f.close()
s.close()
print('END')

END
