In [256]:
%matplotlib qt5
import numpy as np
from sklearn.metrics import explained_variance_score
from sklearn.linear_model import LinearRegression, Lasso, Ridge, BayesianRidge
from scipy.ndimage.interpolation import shift
from scipy.signal import decimate
from matplotlib import pyplot as plt
from scipy import stats
import pandas as pd
import pickle
import sys
sys.path.append('C:\\Users\\francescag\\Documents\\SourceTree_repos\\Python_git\\')
sys.path.insert(0, 'C:\\Users\\francescag\\Documents\\SourceTree_repos')
sys.path.insert(0, 'C:\\Users\\francescag\\Documents\\SourceTree_repos\\Python_git\\freely_moving_photometry_analysis')
from utils.individual_trial_analysis_utils import SessionEvents

In [3]:
def rolling_zscore(x, window=10*10000):
    r = x.rolling(window=window)
    m = r.mean().shift(1)
    s = r.std(ddof=0).shift(1)
    z = (x-m)/s
    return z

In [228]:
mouse = 'SNL_photo35'
date = '20201118'
saving_folder = 'W:\\photometry_2AC\\processed_data\\' + mouse + '\\'
events_folder = 'W:\\photometry_2AC\\processed_data\\' + mouse + '\\linear_regression\\'
restructured_data_filename = mouse + '_' + date + '_' + 'restructured_data.pkl'
trial_data = pd.read_pickle(saving_folder + restructured_data_filename) 
dff_trace_filename = mouse + '_' + date + '_' + 'smoothed_signal.npy'
dff = np.load(saving_folder + dff_trace_filename)

In [229]:
window_size_seconds = 10
sample_rate = 10000
decimate_factor = 100

In [230]:
rolling_zscored_dff = rolling_zscore(pd.Series(dff), window=window_size_seconds*sample_rate)

In [231]:
dff.shape

(23030000,)

In [232]:
downsampled_zscored_dff = decimate(decimate(rolling_zscored_dff[window_size_seconds*sample_rate:], int(decimate_factor/10)), int(decimate_factor/10))

In [233]:
num_samples = downsampled_zscored_dff.shape[0]

In [234]:
num_samples

229300

In [235]:
def turn_timestamps_into_continuous(num_samples, *behavioural_events):
    continuous_parameters = []
    for event_type_timestamps in behavioural_events:
        continuous_time_version = np.zeros([num_samples])
        continuous_time_version[event_type_timestamps] = 1
        continuous_parameters.append(continuous_time_version)
    return continuous_parameters

In [236]:
from utils.individual_trial_analysis_utils import SessionEvents

In [237]:
aligned_filename = mouse + '_' + date + '_' +'behavioural_events_with_no_rewards_added.py'
save_filename = events_folder + aligned_filename
example_session_data = pickle.load(open(save_filename, "rb"))


In [238]:
def convert_behavioural_timestamps_into_samples(time_stamps, window_to_remove, sample_rate=10000, decimate_factor=100):
    adjusted_stamps = (time_stamps - window_to_remove)*sample_rate/decimate_factor
    adjusted_stamps = np.round(np.vstack(adjusted_stamps).astype(np.float)).astype(int)
    return adjusted_stamps

In [239]:
ipsi_choices = convert_behavioural_timestamps_into_samples(example_session_data.choice_data.ipsi_data.event_times, window_size_seconds)
contra_choices = convert_behavioural_timestamps_into_samples(example_session_data.choice_data.contra_data.event_times, window_size_seconds)
high_cues = convert_behavioural_timestamps_into_samples(example_session_data.cue_data.high_cue_data.event_times, window_size_seconds)
low_cues = convert_behavioural_timestamps_into_samples(example_session_data.cue_data.low_cue_data.event_times, window_size_seconds)
rewards = convert_behavioural_timestamps_into_samples(example_session_data.reward_data.reward_data.event_times, window_size_seconds)
no_rewards = convert_behavioural_timestamps_into_samples(example_session_data.reward_data.no_reward_data.event_times, window_size_seconds)


In [240]:
contra_choices.shape

(147, 1)

In [241]:
low_cues.shape

(86, 1)

In [242]:
parameters = turn_timestamps_into_continuous(num_samples,  high_cues, low_cues, ipsi_choices, contra_choices, rewards, no_rewards)

In [243]:
all_trial_starts = np.unique(np.concatenate([example_session_data.cue_data.high_cue_data.trial_starts, example_session_data.cue_data.low_cue_data.trial_starts, example_session_data.choice_data.contra_data.trial_starts, example_session_data.choice_data.ipsi_data.trial_starts, example_session_data.reward_data.no_reward_data.trial_starts, example_session_data.reward_data.reward_data.trial_starts]))
all_trial_ends = np.unique(np.concatenate([example_session_data.cue_data.high_cue_data.trial_ends, example_session_data.cue_data.low_cue_data.trial_ends, example_session_data.choice_data.contra_data.trial_ends, example_session_data.choice_data.ipsi_data.trial_ends, example_session_data.reward_data.no_reward_data.trial_ends, example_session_data.reward_data.reward_data.trial_ends]))

In [244]:
trial_starts_samps= np.squeeze(convert_behavioural_timestamps_into_samples(all_trial_starts, window_size_seconds))
trial_ends_samps= np.squeeze(convert_behavioural_timestamps_into_samples(all_trial_ends, window_size_seconds))

In [245]:
np.mean(all_trial_ends - all_trial_starts) + 2*np.std(all_trial_ends - all_trial_starts)

40.625922006221025

In [246]:
trial_durations = all_trial_ends - all_trial_starts

In [247]:
trials_to_include = pd.DataFrame({'trial starts': trial_starts_samps, 'trial ends': trial_ends_samps, 'durations': trial_durations})

In [248]:
trials_to_remove = trials_to_include[trials_to_include['durations']> np.mean(trial_durations) + 2*np.std(trial_durations)].reset_index(drop=True)

In [249]:
trials_to_remove

Unnamed: 0,trial starts,trial ends,durations
0,26142,30536,43.944
1,122210,133650,114.3947
2,134879,152230,173.5071
3,202619,228010,253.918


In [250]:
inds_to_go = []
for ind, trial in trials_to_remove.iterrows():
    inds_to_go.append(slice(int(trial['trial starts']), int(trial['trial ends'])))

In [251]:
ind = np.indices(downsampled_zscored_dff.shape)[0]
rm = np.hstack([ind[i] for i in inds_to_go])
trace_for_reg = np.take(downsampled_zscored_dff, sorted(set(ind)-set(rm)))

In [252]:
params_for_reg = []
for param in parameters:
    param_new = np.take(param, sorted(set(ind)-set(rm)))
    params_for_reg.append(param_new)

In [130]:
all_param_indices, X = make_design_matrix(params_for_reg, window_max=0.5*10000/100)
results = LinearRegression().fit(X, trace_for_reg)
results.score(X, trace_for_reg)

0.08306934051237935

In [124]:
all_param_indices, X = make_design_matrix(params_for_reg)
results = LinearRegression().fit(X, trace_for_reg)
results.score(X, trace_for_reg)

0.45549512839722717

In [259]:
from sklearn.ensemble import GradientBoostingRegressor

In [260]:
clf = GradientBoostingRegressor(loss='quantile', alpha=0.5)

In [258]:
param_names = ['high_cues', 'low_cues', 'ipsi_choices', 'contra_choices', 'rewards', 'no_rewards']
plot_kernels(param_names, results.coef_)

In [207]:
trace_for_regression = np.array([])
for ind, trial in trials_to_include.iterrows():
    trial_start = trial['trial starts']
    trial_end = trial['trial ends']
    trace_for_regression = np.append(trace_for_regression, downsampled_zscored_dff[trial_start:trial_end])

TypeError: slice indices must be integers or None or have an __index__ method

In [117]:
params_for_regression = []
for param in parameters:
    param_new = np.array([])
    for ind, trial in trials_to_include.iterrows():
        trial_start = trial['trial starts']
        trial_end = trial['trial ends']
        param_new = np.append(param_new, param[trial_start:trial_end])
    params_for_regression.append(param_new)

In [160]:
all_param_indices, X = make_design_matrix(params_for_reg)

range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0,

range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(402, 603)
range(603, 804)
range(0, 201)
range(201, 402)
range(40

In [223]:
def make_design_matrix(parameters, window_min=-0.5*10000/100, window_max=1.5*10000/100):
    num_parameters = len(parameters)
    shifts = np.arange(window_min, window_max + 1)
    shift_window_size = shifts.shape[0]
    X = np.zeros([parameters[0].shape[0], shift_window_size*num_parameters])
    all_param_indices = []
    for shift_num, shift_val in  enumerate(shifts):
        for param_num, param in enumerate(parameters):
            param_indices = range(param_num*shift_window_size, param_num*shift_window_size + shift_window_size)
            all_param_indices.append(param_indices)
            shifted_param = shift(param, shift_val, cval=0) 
            X[:, param_indices[shift_num]] = shifted_param
    return(all_param_indices, X)


In [139]:
def make_shifts_for_params(param_names):
    shifts_for_params = []
    shift_window_sizes = []
    shifts = {'high_cues': np.arange(0, 1*10000/100 + 1),
              'low_cues':np.arange(0, 1*10000/100 + 1),
              'ipsi_choices': np.arange(-0.5*10000/100, 1.5*10000/100 + 1),
              'contra_choices': np.arange(-0.5*10000/100, 1.5*10000/100 + 1),
              'rewards': np.arange(0, 1*10000/100 + 1),
              'no_rewards': np.arange(0, 1*10000/100 + 1)
             }
    for param in param_names:
        shifts_for_params.append(shifts[param])
        shift_window_sizes.append(shifts[param].shape[0])
    return shifts_for_params, shift_window_sizes

In [179]:
param_names = ['high_cues', 'low_cues', 'ipsi_choices', 'contra_choices', 'rewards', 'no_rewards']
shifts, windows = make_shifts_for_params(param_names)

In [162]:
X.shape[1]/4

201.0

In [165]:
np.sum(windows[:0])

0.0

In [180]:
param_inds, newX = make_design_matrix_different_shifts(params_for_reg, shifts, windows)

In [181]:
new_results = LinearRegression().fit(newX, trace_for_reg)

In [209]:
new_results.score(newX, trace_for_reg)

0.4308904928091375

In [206]:
param_names = ['high_cues', 'low_cues', 'ipsi_choices', 'contra_choices', 'rewards', 'no_rewards']
plot_kernels_different_shifts(param_names, new_results.coef_, shifts, windows)

In [168]:
def make_design_matrix_different_shifts(parameters, all_shifts, shift_window_sizes):
    num_parameters = len(parameters)
    total_num_regressors = np.sum(shift_window_sizes)
    X = np.zeros([parameters[0].shape[0], total_num_regressors])
    all_param_indices = []
    for param_num, param in enumerate(parameters):
        shifts = all_shifts[param_num]
        shift_window_size = shift_window_sizes[param_num]
        starting_ind = int(np.sum(shift_window_sizes[:param_num]))
        for shift_num, shift_val in  enumerate(shifts):
                param_indices = range(starting_ind, starting_ind + shift_window_size)
                all_param_indices.append(param_indices)
                shifted_param = shift(param, shift_val, cval=0) 
                X[:, param_indices[shift_num]] = shifted_param
    return(all_param_indices, X)

In [70]:
def plot_kernels(parameter_names,coefs,window_min=-0.5*10000/100, window_max=1.5*10000/100):
    
    fig, axs = plt.subplots(nrows=1, ncols=len(parameter_names), sharey=True, figsize=(15,8))
    axs[0].set_ylabel('Regression coefficient')
    shifts = np.arange(window_min, window_max + 1)/100
    shift_window_size = shifts.shape[0]
    for param_num, param_name in enumerate(parameter_names):
        param_kernel = coefs[param_num*shift_window_size:(param_num+1)*shift_window_size]
        axs[param_num].plot(shifts, param_kernel, label=param_name)
        axs[param_num].set_title(param_name)
        axs[param_num].axvline([0])
        axs[param_num].set_xlabel('Time (s)')

In [205]:
def plot_kernels_different_shifts(parameter_names, coefs, all_shifts, shift_window_sizes):
    fig, axs = plt.subplots(nrows=1, ncols=len(parameter_names), sharey=True, figsize=(15,8))
    axs[0].set_ylabel('Regression coefficient')
    for param_num, param_name in enumerate(parameter_names):
        shifts = all_shifts[param_num]
        shift_window_size = shift_window_sizes[param_num]
        starting_ind = int(np.sum(shift_window_sizes[:param_num]))
        param_kernel = coefs[starting_ind: starting_ind + shift_window_size]
        axs[param_num].plot(shifts*100/10000, param_kernel, label=param_name)
        axs[param_num].set_title(param_name)
        axs[param_num].axvline([0])
        axs[param_num].set_xlabel('Time (s)')

In [120]:
all_param_indices, X = make_design_matrix(params_for_regression)
results = LinearRegression().fit(X, trace_for_regression)
results.score(X, trace_for_reg)

0.07208495587481378

In [211]:
param_inds

[range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0, 101),
 range(0

In [201]:
np.sum(windows[:3])

403

In [26]:
results.score(X, trace_for_regression)

0.006979412031562715

In [27]:
X

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True],
       [False, False, False, ...,  True,  True,  True]])

In [74]:
all_param_indices, X = make_design_matrix(parameters)
results = LinearRegression().fit(X, downsampled_zscored_dff)
results.score(X, downsampled_zscored_dff)

0.06192720698620613

In [327]:
explained_variance_score(downsampled_zscored_dff, results.predict(X))

0.06994657084118427

In [173]:
param_names = ['high_cues', 'low_cues', 'ipsi_choices', 'contra_choices', 'rewards', 'no_rewards']
plot_kernels(param_names, results.coef_)

In [22]:
from sklearn.metrics import explained_variance_score,  mean_squared_error, r2_score, mean_tweedie_deviance

In [16]:
def plot_params_and_trace(start_ind, num_samps, trace, parameters):
    x = start_ind
    inds = range(x,x+num_samps)
    plt.plot(parameters[3][inds], label='contra choice')
    plt.plot(parameters[4][inds], label='rewards')
    plt.plot(parameters[2][inds], label='ipsi choice')
    plt.plot(parameters[1][inds], label='low cues')
    plt.plot(parameters[0][inds], label='high cues')
    plt.plot(parameters[4][inds], label='reward')
    plt.plot(parameters[5][inds], label='no reward')
    plt.plot(trace[inds]/np.max(trace), label='zscore')
    plt.legend()

In [30]:
plot_params_and_trace(1000, 309000, downsampled_zscored_dff, parameters)

In [31]:
plt.plot(decimated_dff/np.max(decimated_dff), label='dff')
plt.plot(downsampled_zscored_dff/np.max(downsampled_zscored_dff), label='zscore')
plt.legend()

NameError: name 'decimated_dff' is not defined

In [33]:
def remove_one_parameter(param_names, params_to_remove, old_coefs, old_X, window_min=-0.5*10000/100, window_max=1.5*10000/100):
    param_df = pd.DataFrame({'parameter': param_names})
    params_to_include = param_df[~param_df['parameter'].isin(params_to_remove)]
    params_to_include = params_to_include.reset_index(drop=False)
    num_parameters = params_to_include.shape[0]
    shifts = np.arange(window_min, window_max + 1)/100
    shift_window_size = shifts.shape[0]
    new_coefs = np.zeros([shift_window_size*num_parameters])
    new_X = np.zeros([X.shape[0], shift_window_size*num_parameters])
    for param_num, param_row in params_to_include.iterrows():
        old_index = param_row['index']
        new_index = param_num
        param_kernel = old_coefs[old_index*shift_window_size:(old_index+1)*shift_window_size]
        param_indices = range(new_index*shift_window_size,new_index*shift_window_size + shift_window_size)
        new_coefs[param_indices] = param_kernel
        old_X = X[:, old_index*shift_window_size:(old_index+1)*shift_window_size]
        new_X[:, param_indices] = old_X
    return new_coefs, new_X, params_to_include

In [36]:
def include_parameters(param_names, params, old_coefs, old_X, window_min=-0.5*10000/100, window_max=1.5*10000/100):
    param_df = pd.DataFrame({'parameter': param_names})
    params_to_include = param_df[param_df['parameter'].isin(params)]
    params_to_include = params_to_include.reset_index(drop=False)
    num_parameters = params_to_include.shape[0]
    shifts = np.arange(window_min, window_max + 1)/100
    shift_window_size = shifts.shape[0]
    new_coefs = np.zeros([shift_window_size*num_parameters])
    new_X = np.zeros([X.shape[0], shift_window_size*num_parameters])
    for param_num, param_row in params_to_include.iterrows():
        old_index = param_row['index']
        new_index = param_num
        param_kernel = old_coefs[old_index*shift_window_size:(old_index+1)*shift_window_size]
        param_indices = range(new_index*shift_window_size,new_index*shift_window_size + shift_window_size)
        new_coefs[param_indices] = param_kernel
        old_X = X[:, old_index*shift_window_size:(old_index+1)*shift_window_size]
        new_X[:, param_indices] = old_X
    return new_coefs, new_X, params_to_include

In [34]:
def remove_param_and_calculate_r2(param_names, param_to_remove, old_coefs, old_X, intercept, dff):
    new_coefs, new_X, params = remove_one_parameter(param_names, param_to_remove, old_coefs, old_X)
    new_pred = np.dot(new_X, new_coefs) + intercept
    old_pred = np.dot(old_X, old_coefs) + intercept
    old_r2 = explained_variance_score(dff, old_pred)
    new_r2 = explained_variance_score(dff, new_pred)
    prop_due_to_param = (old_r2 - new_r2)/old_r2 * 100
    print(old_r2, new_r2)
    print(prop_due_to_param)
    return new_pred, prop_due_to_param

In [38]:
def include_params_and_calculate_r2(param_names, param_to_remove, old_coefs, old_X, intercept, dff):
    new_coefs, new_X, params = include_parameters(param_names, param_to_remove, old_coefs, old_X)
    new_pred = np.dot(new_X, new_coefs) + intercept
    old_pred = np.dot(old_X, old_coefs) + intercept
    old_r2 = explained_variance_score(dff, old_pred)
    new_r2 = explained_variance_score(dff, new_pred)
    prop_due_to_param = (new_r2)/old_r2 * 100
    print(old_r2, new_r2)
    print(prop_due_to_param)
    return new_pred, prop_due_to_param

In [39]:
plt.plot(downsampled_zscored_dff)
plt.plot(cue_pred)
plt.plot(reward_pred)
plt.plot(choice_pred)
plt.plot(parameters[3][inds], label='contra choice')
plt.plot(parameters[4][inds], label='rewards')
plt.plot(parameters[2][inds], label='ipsi choice')
plt.plot(parameters[1][inds], label='low cues')
plt.plot(parameters[0][inds], label='high cues')
plt.plot(parameters[4][inds], label='reward')
plt.plot(parameters[5][inds], label='no reward')

NameError: name 'no_reward_pred' is not defined

In [350]:
# Contribution of each component in the model was measured by reduction of a deviance compared to a reduced model excluding the component.

In [39]:
total_pred = np.dot(X, results.coef_)

In [43]:
def exp_var(trace, pred):
    var_trace = 

(1206,)

In [40]:
param_names = ['high_cues', 'low_cues', 'ipsi_choices', 'contra_choices', 'rewards', 'no_rewards']
params_to_remove = ['high_cues', 'low_cues']
cue_pred, prop_due_to_cue = include_params_and_calculate_r2(param_names, params_to_remove, results.coef_, X, results.intercept_, trace_for_reg)
params_to_remove = ['ipsi_choices', 'contra_choices']
choice_pred, prop_due_to_choice = include_params_and_calculate_r2(param_names, params_to_remove, results.coef_, X,  results.intercept_, trace_for_reg)
params_to_remove = ['rewards', 'no_rewards']
reward_pred, prop_due_to_param = include_params_and_calculate_r2(param_names, params_to_remove, results.coef_, X,results.intercept_, trace_for_reg)

0.10678892213702296 0.012383118483492828
11.595883014535726
0.10678892213702296 0.08827435305127429
82.66246281426808
0.10678892213702296 -0.005713603484430729
-5.350370965538441


In [255]:
param_names = ['high_cues', 'low_cues', 'ipsi_choices', 'contra_choices', 'rewards', 'no_rewards']
params_to_remove = ['high_cues', 'low_cues']
cue_pred, prop_due_to_cue = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X, results.intercept_, trace_for_reg)
params_to_remove = ['ipsi_choices', 'contra_choices']
choice_pred, prop_due_to_choice = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X,  results.intercept_, trace_for_reg)
params_to_remove = ['rewards', 'no_rewards']
reward_pred, prop_due_to_param = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X,results.intercept_, trace_for_reg)

0.4700672078543989 0.46524239749790863
1.026408623250477
0.4700672078543989 0.40418262625665047
14.015991861775623
0.4700672078543989 -0.021216401515595562
104.51348257463798


In [38]:
params_to_remove = ['rewards']
reward_pred, prop_due_to_param = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X,results.intercept_, trace_for_reg)

0.1860901417942734 -0.007635543157750835
104.10314221061323


In [42]:

old_pred = np.dot(X, results.coef_) + results.intercept_
new_pred = np.ones(old_pred.shape)* results.intercept_
old_r2 = explained_variance_score(trace_for_reg, old_pred)
new_r2 = explained_variance_score(trace_for_reg, new_pred)
(old_r2 - new_r2)/old_r2 * 100

100.0

In [42]:
inds = np.arange(160000, trace_for_reg.shape[0])
plt.plot(trace_for_reg[inds],label='trace', color='k', alpha=0.2)
plt.plot(cue_pred[inds], label='cue')
plt.plot(choice_pred[inds], label = 'choice')
plt.plot(reward_pred[inds], label = 'reward')
plt.plot(params_for_reg[3][inds], label='contra choice')
plt.plot(params_for_reg[4][inds], label='rewards')
#plt.plot(params_for_regression[2][inds], label='ipsi choice')
#plt.plot(params_for_regression[1][inds], label='low cues')
plt.plot(params_for_reg[4][inds], label='reward')
plt.plot(params_for_reg[5][inds], label='no reward')
plt.plot(params_for_reg[1][inds], label='low cues')
#plt.plot(total_pred[inds], label='model', color='y')
plt.legend()

<matplotlib.legend.Legend at 0x1d51c6a0>

In [72]:
params_to_remove = ['high_cues', 'low_cues']
cue_pred, prop_due_to_cue = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X, results.intercept_, downsampled_zscored_dff)

0.3437221932443679 0.004668395994980701
1.3581887020201002
0.3437221932443679 0.3377332123343795
1.742389938065654


In [83]:
params_to_remove = ['ipsi_choices', 'contra_choices']
choice_pred, prop_due_to_choice = include_params_and_calculate_r2(param_names, params_to_remove, results.coef_, X,  results.intercept_, trace_for_regression)
choice_pred, prop_due_to_choice = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X,  results.intercept_, trace_for_regression)

0.06363158444930539 0.02094875620721326
32.921946527833065
0.06363158444930539 0.024072359131555365
62.169165926186295


In [78]:
params_to_remove = ['rewards', 'no_rewards']
reward_pred, prop_due_to_param = include_params_and_calculate_r2(param_names, params_to_remove, results.coef_, X,results.intercept_, downsampled_zscored_dff)
reward_pred, prop_due_to_param = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X,results.intercept_, downsampled_zscored_dff)

0.3437221932443679 0.23574087404276955
68.5847113384298
0.3437221932443679 0.023876094529398095
93.05366514043408


In [77]:
params_to_remove = ['no_rewards']

no_reward_pred, prop_due_to_param = remove_param_and_calculate_r2(param_names, params_to_remove, results.coef_, X, results.intercept_, downsampled_zscored_dff)

0.3437221932443679 0.3327044425436263
3.2054231345220696


In [76]:
5.11348747238289 + 68.5847113384298 + 4.751307436727189 + 1.3581887020201002

79.80769494955997

In [317]:
predictors = X
divisor = float(predictors.shape[0] - predictors.shape[1])
outcomes = downsampled_zscored_dff
sse = np.sum((total_pred - outcomes) ** 2, axis=0) / divisor
if not sse.shape:
    se = np.array([np.sqrt(np.diagonal(sse * np.linalg.inv(np.dot(predictors.T, predictors))))])
else:
    se = np.array(
        [
            np.sqrt(
                np.diagonal(
                    sse[i] * np.linalg.inv(np.dot(predictors.T, predictors))
                )
            )
            for i in range(sse.shape[0])
        ]
    )
t_values = results.coef_ / se
p_values = 2 * (
    1 - stats.t.cdf(np.abs(t_values), outcomes.shape[0] - predictors.shape[1])
)


In [277]:
np.array([np.sqrt(np.diagonal(sse * np.linalg.inv(np.dot(predictors.T, predictors))))])

array([[0.08107052, 0.08107052, 0.08106907, ..., 0.07705439, 0.07705577,
        0.07705577]])

In [30]:
plt.plot(cue_pred, label='cue')
plt.plot(choice_pred, label = 'choice')
plt.plot(reward_pred, label = 'reward')
plt.legend()

<matplotlib.legend.Legend at 0x2f932dd8>

In [255]:
old_r2 = explained_variance_score(downsampled_zscored_dff, old_pred)
new_r2 = explained_variance_score(downsampled_zscored_dff, new_pred)
print(old_r2, new_r2)

0.19382222321776876 0.03147393529840614


In [256]:
old_r2 = r2_score(downsampled_zscored_dff, old_pred)
new_r2 = r2_score(downsampled_zscored_dff, new_pred)
print(old_r2, new_r2)

0.1834066816753005 0.030968894328113894


In [257]:
from sklearn.metrics import mean_poisson_deviance
old_r2 = r2_score(downsampled_zscored_dff, old_pred)
new_r2 = r2_score(downsampled_zscored_dff, new_pred)
print(old_r2, new_r2)

ValueError: Mean Tweedie deviance error with power=1 can only be used on non-negative y and strictly positive y_pred.

In [22]:
x = 80000
inds = range(x,x+5000)
plt.plot(results.predict(X[inds]))
plt.plot(downsampled_zscored_dff[inds])
plt.plot(parameters[3][inds])

NameError: name 'results' is not defined

In [151]:
def save_kernels(parameter_names, regression_results,  window_min=-1 * 10000 / 100, window_max=2 * 10000 / 100):

    shifts = np.arange(window_min, window_max + 1) / 100
    shift_window_size = shifts.shape[0]
    param_kernels = {}
    for param_num, param_name in enumerate(parameter_names):
        kernel_name = parameter_names[param_num]
        param_kernels[kernel_name] = regression_results.coef_[param_num * shift_window_size:(param_num + 1) * shift_window_size]
    return param_kernels, shifts

In [152]:
plt.plot(save_kernels(param_names, results)[1], save_kernels(param_names, results)[0]['ipsi choices'])

[<matplotlib.lines.Line2D at 0xc5d6c88>]