# Fitting encoding models to a single session and probe insertion

This notebook aims to introduce you to the methodology behind the analyses of single-unit encoding models. Beginning with loading in session and spiking data, we will generate a design matrix for that data and then fit the neurons within using said matrix. This script will fit a regularized linear model, and not a poisson model, as that is what is used in the paper.

## Loading in data

We begin by first loading in the trials dataframe, spikes, and cluster information (such as QC metrics and brain region labels) for a given session ID and probe ID.

In [None]:
import numpy as np
import pandas as pd
import brainbox.io.one as bbone
from iblutil.util import Bunch
from one.api import ONE
from brainwidemap.bwm_loading import load_trials_and_mask
from brainwidemap.encoding.utils import load_trials_df


def load_regressors(session_id,
                    pid,
                    t_before=0.,
                    t_after=0.,
                    binwidth=0.02,
                    abswheel=False,
                    clu_criteria='bwm',
                    one=None):
    """
    Load in regressors for given session and probe. Returns a dictionary with the following keys:

    Parameters
    ----------
    session_id : str
        EID of the session to load
    pid : str
        Probe ID to load associated with the session
    t_before : float, optional
        Time before stimulus onset to include in output trial_start column of df, by default 0.
    t_after : float, optional
        Time after feedback to include in output trial_end column of df, by default 0.
    binwidth : float, optional
        Binwidth for wheel signal. Needs to match that of GLM, by default 0.02
    abswheel : bool, optional
        Load in wheel speed instead of velocity, by default False
    ret_qc : bool, optional
        Whether to recompute cluster metrics and return a full dataframe of the result,
        by default False
    clu_criteria : str, optional
        Criteria for saving clusters, 'all' for all units, 'bwm' for criteria matching that of
        brain-wide map (all metrics passing). No others supported for now., by default 'bwm'
    one : ONE, optional
        Instance of ONE, by default None

    Returns
    -------
    trialsdf, spk_times, spk_clu, clu_regions, clu_qc, clu_df, clu_qc (optional)
        Output regressors for GLM
    """
    one = ONE() if one is None else one

    _, mask = load_trials_and_mask(one=one, eid=session_id)
    mask = mask.index[np.nonzero(mask.values)]
    trialsdf = load_trials_df(
        session_id,
        t_before=t_before,
        t_after=t_after,
        wheel_binsize=binwidth,
        ret_abswheel=abswheel,
        ret_wheel=not abswheel,
        addtl_types=['firstMovement_times'],
        one=one,
        trials_mask=mask,
    )

    clusters = {}
    ssl = bbone.SpikeSortingLoader(one=one, pid=pid)
    origspikes, tmpclu, channels = ssl.load_spike_sorting()
    if 'metrics' not in tmpclu:
        tmpclu['metrics'] = np.ones(tmpclu['channels'].size)
    clusters[pid] = ssl.merge_clusters(origspikes, tmpclu, channels)
    clu_df = pd.DataFrame(clusters[pid]).set_index(['cluster_id'])
    clu_df['pid'] = pid

    if clu_criteria == 'bwm':
        keepclu = clu_df.index[clu_df.label == 1]
    elif clu_criteria == 'all':
        keepclu = clu_df.index
    else:
        raise ValueError("clu_criteria must be 'bwm' or 'all'")

    clu_df = clu_df.loc[keepclu]
    keepmask = np.isin(origspikes.clusters, keepclu)
    spikes = Bunch({k: v[keepmask] for k, v in origspikes.items()})
    sortinds = np.argsort(spikes.times)
    spk_times = spikes.times[sortinds]
    spk_clu = spikes.clusters[sortinds]
    clu_regions = clusters[pid].acronym
    return trialsdf, spk_times, spk_clu, clu_regions, clu_df