# Projection on learning dimension and decoding

Implement PCA or other dimensionality reduction techniques.
Project activity on "learning" dimension defined as difference between meaningful average population response.

In [25]:
import os

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

import src.utils.utils_io as io
import src.utils.utils_imaging as imaging_utils


# Path to the directory containing the processed data.
processed_dir = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/data_processed/mice"

# Session metadata file.
db_path = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/session_metadata.xlsx"

# Rewarded and non-rewarded NWB files.
group_yaml_rew = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/groups/imaging_rewarded.yaml"
group_yaml_non_rew = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/groups/imaging_non_rewarded.yaml"
nwb_list_rew = io.read_group_yaml(group_yaml_rew)
nwb_list_non_rew = io.read_group_yaml(group_yaml_non_rew)
nwb_list = nwb_list_rew + nwb_list_non_rew


def reduce_dimensionality(activity, win, n_days, n_trials, fitting_type='single_bin'):
    
    if fitting_type == 'single_bin':
        X = np.mean(activity[:, :, :, win[0]:win[1]], axis=3)
        X = np.reshape(X, (X.shape[0], -1))
    elif fitting_type == 'psth':
        X = np.mean(activity, axis=2)
    else:
        raise ValueError('Unknown fitting type.')
    
    # Transpose to (n_samples, n_features).
    X = X.T
    # z-score the data.
    X = StandardScaler(with_mean=True, with_std=True).fit_transform(X)
    # Perform PCA.
    pca = PCA(n_components=None)
    model = pca.fit(X)
    
    # Apply the model.
    # Reshape activity to (n_neurons, n_trials x n_timepoints) and transform.
    reduced_act = model.transform(activity.reshape(activity.shape[0], -1).T)
    reduced_act = reduced_act.T
    n_pc = model.n_components_
    s = activity.shape
    # First dim length is min(n_features, n_samples).
    # Add session dimension.
    reduced_act = reduced_act.reshape(n_pc, n_days, n_trials, s[2])
    
    return reduced_act, model


def pc_to_retain(model, threshold):
    
    mask = model.explained_variance_ratio_.cumsum() < threshold
    # Also select the first PC that crosses the threshold.
    pc_that_cross = (mask == False).argmax()
    mask[pc_that_cross] = True
    
    return mask



## Part 1. PCA and PC PSTH's

PCA in performed on the mapping trials.

In [9]:
sampling_rate = 30
trial_type = 'UM'
n_trials = 50
n_days = 5
win = (1, 2)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline = (0, 1)
baseline = (int(baseline[0] * sampling_rate), int(baseline[1] * sampling_rate))

mouse_list = ['AR127']

session_list = io.select_sessions_from_db(db_path,
                                          experimenters=['AR', 'GF', 'MI'],
                                          exclude_cols=['exclude', 'two_p_exclude'],
                                          subject_id=mouse_list,
                                          reward_group='R+',
                                          day=['-2', '-1', '0', '+1', '+2'],
                                          two_p_imaging='yes')

activity = imaging_utils.shape_features_matrix(mouse_list, session_list, processed_dir, trial_type, 50)
# Subtract baselines.
activity = activity - np.nanmean(activity[:, :, baseline[0]:baseline[1]],
                                 axis=2, keepdims=True)

  for idx, row in parser.parse():


### Shape feature matrix.
Fit PCA by either:
- for each neuron, keep all trial and compute the mean response over time.
- for each neuron, average trials and keep the mean response over time
(keep time dimension).


In [10]:

# X = np.mean(activity[:, :, :, win[0]:win[1]], axis=3)
X = np.mean(activity, axis=2)
X = np.reshape(X, (X.shape[0], -1))

# Transpose to (n_samples, n_features).
X = X.T
# z-score the data.
X = StandardScaler(with_mean=True, with_std=True).fit_transform(X)
# Perform PCA.
pca = PCA(n_components=None)
model = pca.fit(X)

### PC PSTH's
Look at the PC's average responses across days.

In [None]:
# Apply the model.
# Reshape activity to (n_neurons, n_trials x n_timepoints) and transform.
reduced_act = model.transform(activity.reshape(activity.shape[0], -1).T)
reduced_act = reduced_act.T
n_pc = model.n_components_
s = activity.shape
# First dim length is min(n_features, n_samples).
# Add session dimension.
reduced_act = reduced_act.reshape(n_pc, n_days, n_trials, s[2])

# Create PC PSTH's. 
pc_psth = reduced_act - np.mean(reduced_act[:, :, :, baseline[0]:baseline[1]],
                                axis=3, keepdims=True)
pc_psth = np.mean(reduced_act, axis=2)

# Save PC PSTH to pdf.
pdf_path = r"\\sv-nas1.rcp.epfl.ch\Petersen-Lab\analysis\Anthony_Renard\analysis_output\pca\pca_exploration.pdf"
with PdfPages(pdf_path) as pdf:
    for pc in range(30):
        f, axes = plt.subplots(1, 5, sharey=True, figsize=(15, 5))
        for i in range(n_days):
            axes[i].plot(pc_psth[pc, i, :])
            axes[i].axvline(30, color='orange')
        f.suptitle(f"PC {pc+1}")
        pdf.savefig(f)
        plt.close()

### Look at loadings and explained variance


In [11]:
plt.figure()
plt.plot(model.explained_variance_ratio_.cumsum())

# Plot loading of first 10 PCs.
plt.figure(figsize=(10, 10))
for i in range(10):
    plt.subplot(5, 2, i+1)
    plt.plot(model.components_[i])
    plt.title(f"PC {i+1}")
plt.tight_layout()
plt.show()

False


## Part 2. Similarity of population vectors across learning and projection on learning dimension.

I can do projection the difference vector or I can measure similarity.
Do we see a discret transition from before to after learning?

**How to assess whether there is actually a significant difference between the two vectors to start with?**


In [30]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 2)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline = (0, 1)
baseline = (int(baseline[0] * sampling_rate), int(baseline[1] * sampling_rate))
n_trials = 50
days = ['-2', '-1', '0', '+1', '+2']
apply_pca = True
variance_to_retain = 0.8

mouse_list = ['AR127']

session_list = io.select_sessions_from_db(db_path,
                                          experimenters=['AR', 'GF', 'MI'],
                                          exclude_cols=['exclude', 'two_p_exclude'],
                                          subject_id=mouse_list,
                                          reward_group='R+',
                                          day=['-2', '-1', '0', '+1', '+2'],
                                          two_p_imaging='yes')

act_map = imaging_utils.shape_features_matrix(mouse_list, session_list, processed_dir, 'UM', n_trials)
# Subtract baselines.
act_map = act_map - np.nanmean(act_map[:, :, baseline[0]:baseline[1]],
                               axis=2, keepdims=True)
act_learning = imaging_utils.shape_features_matrix(mouse_list, session_list, processed_dir, 'WH', n_trials)
# Subtract baselines.
act_learning = act_learning - np.nanmean(act_learning[:, :, baseline[0]:baseline[1]],
                               axis=2, keepdims=True)

act_map = act_map.reshape(act_map.shape[0], 5, n_trials, -1)
act_learning = act_learning.reshape(act_learning.shape[0], 3, n_trials, -1)



# TODO: dim reduction must be applied to all data Um and WH at the same time.
    
# Reduce dimensionality of the data.
if apply_pca:
    reduced_act_map, model_map = reduce_dimensionality(act_map, win, fitting_type='single_bin')
    reduced_act_learning, model_learning = reduce_dimensionality(act_learning, win, fitting_type='single_bin')
    reduced_act_map = reduced_act_map[pc_to_retain(model_map, variance_to_retain)]
    reduced_act_learning = reduced_act_learning[pc_to_retain(model_map, variance_to_retain)]





# Compute similarity between single trial population response and response after learning.
vect_after_learning = np.mean(act_map[:, 4, :, win[0]:win[1]], axis=(1,2))
print(vect_after_learning.shape)
pop_vect_mapping = np.mean(act_map[:, :, :, win[0]:win[1]], axis=(3))
pop_vect_learning = np.mean(act_learning[:, :, :, win[0]:win[1]], axis=(3))
print(pop_vect_mapping.shape)
print(pop_vect_learning.shape)

cosine_sim_mapping = np.zeros((pop_vect_mapping.shape[1], n_trials))
cosine_sim_learning = np.zeros((pop_vect_learning.shape[1], n_trials))
dot_sim_mapping = np.zeros((pop_vect_mapping.shape[1], n_trials))
dot_sim_learning = np.zeros((pop_vect_learning.shape[1], n_trials))

def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def dot_similarity(a, b):
    return np.dot(a, b)

for i in range(pop_vect_mapping.shape[1]):
    for k in range(n_trials):
        cosine_sim_mapping[i, k] = cosine_similarity(pop_vect_mapping[:, i, k],
                                                     vect_after_learning)
        dot_sim_mapping[i, k] = dot_similarity(pop_vect_mapping[:, i, k],
                                               vect_after_learning)

for i in range(pop_vect_learning.shape[1]):
    for k in range(n_trials):
        cosine_sim_learning[i, k] = cosine_similarity(pop_vect_learning[:, i, k],
                                                      vect_after_learning)
        dot_sim_learning[i, k] = dot_similarity(pop_vect_learning[:, i, k],
                                                vect_after_learning)


SyntaxError: invalid syntax (2503804123.py, line 37)

In [27]:
# Plot similarity.
# Mapping trials of the two pretraining days and then learning interleaved
# with mapping trials.
cosine_sim = np.concatenate((cosine_sim_mapping[0],
                             cosine_sim_mapping[1],
                             cosine_sim_learning[0],
                             cosine_sim_mapping[2],
                             cosine_sim_learning[1],
                             cosine_sim_mapping[3],
                             cosine_sim_learning[2],
                             cosine_sim_mapping[4]), axis=0)
dot_sim = np.concatenate((dot_sim_mapping[0],
                             dot_sim_mapping[1],
                             dot_sim_learning[0],
                             dot_sim_mapping[2],
                             dot_sim_learning[1],
                             dot_sim_mapping[3],
                             dot_sim_learning[2],
                             dot_sim_mapping[4]), axis=0)

n_blocks = 8
mapping_block = [0, 1, 3, 5, 7]
learning_block = [2, 4, 6]

sns.set_theme(context='notebook', style='ticks', palette='deep')   
plt.figure()
for i in range(n_blocks):
    if i in learning_block:
        color = 'green'
    else:
        color = 'salmon'
    plt.scatter(range(n_trials*i, n_trials*(i+1)),
                cosine_sim[i*n_trials:(i+1)*n_trials],
                # dot_sim[i*n_trials:(i+1)*n_trials],
                color=color)
plt.show()

In [28]:
%matplotlib qt