In [2]:
import os
import pickle
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import colors
from scipy.stats import mannwhitneyu, wilcoxon
from sklearn.metrics import auc, roc_curve
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import spearmanr

sys.path.append(r'H:/anthony/repos/NWB_analysis')
from matplotlib.backends.backend_pdf import PdfPages
from nwb_wrappers import nwb_reader_functions as nwb_read
import src.utils.utils_imaging as imaging_utils
import src.utils.utils_io as io
from src.behavior import compute_performance, plot_single_session
import warnings

# Set plot parameters.
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'
sns.set_theme(context='paper', style='ticks', palette='deep', font='sans-serif', font_scale=1)

In [3]:
# Path to the directory containing the processed data.
processed_dir = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/data_processed/mice"
nwb_dir = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/NWB"

# Session metadata file.
db_path = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/session_metadata.xlsx"

# # Rewarded and non-rewarded NWB files.
# group_yaml_rew = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/groups/imaging_rewarded.yaml"
# group_yaml_non_rew = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/groups/imaging_non_rewarded.yaml"
# nwb_list_rew = io.read_group_yaml(group_yaml_rew)
# nwb_list_non_rew = io.read_group_yaml(group_yaml_non_rew)
# nwb_list = nwb_list_rew + nwb_list_non_rew

## 1. Response to sensory mapping trials across learning

Here I look at the evolution of the response of the population across learning day. This is computed on the 50 whisker stimulation presentated at the end of each session after disengagement.

- PSTH
- amplitude of the population response
- number of responsive neurons
- stability of the response



### 1.1. Responses to unmotivated mapping trials across learning days.

- Amplitude of the response
- Number of significant cells
- variance across days
- dimensionality across days


In [140]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.3)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
days = ['-2', '-1', '0', '+1', '+2']
# Correlation matrix for a specific cell type
cell_type = None
variance_explained_thr = 0.7

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes')
print(mice)
len(mice)


['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF319', 'GF323', 'GF333', 'GF334', 'GF348', 'GF350', 'MI062', 'MI069', 'MI072', 'MI075', 'MI076', 'AR132', 'AR133', 'AR135', 'AR137', 'AR139', 'AR127', 'AR131', 'AR143', 'AR144']


30

In [141]:
average_response = {}
responsive_p_values = {}
std = {}
dimensionality = {}
metadata = {}

for mouse_id in mice:
    # Disregard these mice as the number of trials is too low.
    if mouse_id in ['GF307', 'GF310', 'GF333', 'AR144', 'AR135']:
        continue
    session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                        nwb_dir,
                                                                        two_p_imaging='yes',
                                                                        subject_id=mouse_id,
                                                                        day=days,)
    print(session_list)

    data = []
    mdata_list = []
    for session_id in session_list:
        arr, mdata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                            session_id,
                                                            processed_dir)
        arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
        data.append(arr)
        mdata_list.append(mdata)

    # Extract UM trials.
    for i, arr in enumerate(data):
        arr = imaging_utils.extract_trials(arr, mdata_list[i], 'UM', n_trials=None)
        data[i] = arr

    # # Select UM trials.
    # data = [arr[:, -1] for arr in data]
    # # Remove trials with NaNs.
    # data = [arr[:, ~np.isnan(arr).all(axis=(0,2))] for arr in data]

    # Select cell type.
    if cell_type:
        cell_type_mask = mdata['cell_types']==cell_type
        data = [arr[cell_type_mask] for arr in data]

    # Get some metadata.
    reward_group = io.get_reward_group_from_db(db_path, session_list[0])
    metadata[mouse_id] = {}
    metadata[mouse_id]['reward_group'] = reward_group
    metadata[mouse_id]['cell_types'] = mdata['cell_types']


    # Compute average response for each trial, each day.
    # --------------------------------------------------

    average_response[mouse_id] = []
    for day in data:
        average_response[mouse_id].append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))


    # Compute standard deviation of population response.
    # ----------------------------------------------------  

    std[mouse_id] = []
    for day in data:
        std[mouse_id].append(np.std(np.nanmean(day[:, :, win[0]:win[1]], axis=2), axis=0))


    # Test responsiveness.
    # --------------------

    baseline_avg = []
    response_avg = []
    for day in data:
        baseline_avg.append(np.nanmean(day[:, :, baseline_win[0]:baseline_win[1]], axis=2))
        response_avg.append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))

    # Compare response amplitude to baseline.
    n_cells = data[0].shape[0]
    p_values = [np.zeros(n_cells) for _ in range(len(data))]
    for iday, day in enumerate(data):
        for icell in range(n_cells):
            _, p_values[iday][icell] = wilcoxon(baseline_avg[iday][icell], response_avg[iday][icell])
    p_values = np.stack(p_values, axis=0)
    responsive_p_values[mouse_id] = p_values


    # Compute dimensionality of the population response.
    # --------------------------------------------------

    dimensionality[mouse_id] = []
    pca_results = []
    for day in data:
        X = np.mean(day[:,:,win[0]:win[1]], axis=2)
        X = X.T
        X = StandardScaler(with_mean=True, with_std=True).fit_transform(X)
        pca = PCA()
        model = pca.fit(X)
        n_comp = np.sum(model.explained_variance_ratio_.cumsum() < variance_explained_thr) + 1
        dimensionality[mouse_id].append(n_comp)



['GF305_27112020_083119', 'GF305_28112020_103938', 'GF305_29112020_103331', 'GF305_30112020_110255', 'GF305_02122020_132229']
['GF306_27112020_104436', 'GF306_28112020_125555', 'GF306_29112020_131929', 'GF306_30112020_133249', 'GF306_02122020_161611']
['GF308_17112020_105052', 'GF308_18112020_093627', 'GF308_19112020_103527', 'GF308_20112020_122826', 'GF308_21112020_135515']
['GF311_17112020_155501', 'GF311_18112020_151838', 'GF311_19112020_160412', 'GF311_20112020_171609', 'GF311_21112020_180049']
['GF313_27112020_141857', 'GF313_28112020_154236', 'GF313_29112020_154625', 'GF313_30112020_154904', 'GF313_03122020_082147']
['GF314_27112020_160459', 'GF314_28112020_171800', 'GF314_29112020_174831', 'GF314_30112020_171906', 'GF314_03122020_102249']
['GF317_15122020_081931', 'GF317_16122020_082007', 'GF317_17122020_080715', 'GF317_18122020_104834', 'GF317_20122020_120604']
['GF318_15122020_095616', 'GF318_16122020_095516', 'GF318_17122020_144100', 'GF318_18122020_132105', 'GF318_19122020_1

Convert to pandas and plot.

In [150]:
mouse_ids = average_response.keys()

df = []
for mouse_id in mouse_ids:
    for iday in range(len(days)):
        amp = np.nanmean(np.nanmean(average_response[mouse_id][iday], axis=1), axis=0) * 100
        s = np.mean(std[mouse_id][iday])
        n_resp_05 = np.sum(responsive_p_values[mouse_id][iday] <= 0.05) / responsive_p_values[mouse_id][iday].size * 100
        n_resp_01 = np.sum(responsive_p_values[mouse_id][iday] <= 0.01) / responsive_p_values[mouse_id][iday].size * 100
        dim = dimensionality[mouse_id][iday]
        temp = pd.DataFrame([[amp, s, n_resp_05, n_resp_01, dim, days[iday], mouse_id, metadata[mouse_id]['reward_group']]],
                            columns=['population_response', 'population_std', 'n_responsive_thr_0.05', 'n_responsive_thr_0.01', 'dimensionality', 'day', 'mouse_id', 'reward_group'])
        df.append(temp)
df = pd.concat(df)


In [153]:
output_dir = r'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/sensory_plasticity'
if cell_type:
    pdf_file = f'responses_across_learning_{cell_type}.pdf'
    df_file = f'responses_across_learning_{cell_type}.csv'
else:
    pdf_file = f'responses_across_learning.pdf'
    df_file = f'responses_across_learning.csv'

with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
    sns.set_theme(context='talk', style='ticks', palette='deep', font='sans-serif', font_scale=1)
    palette = sns.color_palette(['#238443', '#d51a1c'])

    fig, axes = plt.subplots(2, 2, figsize=(10, 6), sharex=True)
    sns.boxplot(data=df, x='day', y='population_response', hue='reward_group',
                ax=axes[0,0], legend=False, hue_order=['R+', 'R-'], palette=palette)
    axes[0,0].set_title('Amplitude')
    axes[0,0].set_ylabel(r'% dF/F')
    axes[0,0].set_ylim([0, 6])

    sns.barplot(data=df, x='day', y='population_std', hue='reward_group',
                ax=axes[0,1], legend=False, hue_order=['R+', 'R-'], palette=palette)
    axes[0,1].set_title('Population variability')
    axes[0,1].set_ylabel(r'Standard deviation')
    axes[0,1].set_ylim([0, 0.2])

    sns.barplot(data=df, x='day', y='dimensionality', hue='reward_group',
                ax=axes[1,0], legend=False, hue_order=['R+', 'R-'], palette=palette)
    axes[1,0].set_title('Dimensionality')
    axes[1,0].set_ylabel('# PCs')
    axes[1,0].set_ylim([0, 40])

    sns.barplot(data=df, x='day', y='n_responsive_thr_0.01', hue='reward_group',
                ax=axes[1,1], hue_order=['R+', 'R-'], palette=palette)
    axes[1,1].set_title(r'% responsive cells (p<0.01)')
    axes[1,1].set_ylabel(r'% responsive')
    axes[1,1].set_ylim([0, 50])

    sns.despine()
    plt.tight_layout()

    # pdf.savefig(fig, dpi=300)
    # df.to_csv(os.path.join(output_dir, df_file), index=False)

  with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:


### 1.2. Correlation matrices and responsive similarity across learning.

Start with making a correlation matrix for each mouse and cell type.

In [39]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.3)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
days = ['-2', '-1', '0', '+1', '+2']

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes')
print(mice)

['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF319', 'GF323', 'GF333', 'GF334', 'GF348', 'GF350', 'MI062', 'MI069', 'MI072', 'MI075', 'MI076', 'AR132', 'AR133', 'AR135', 'AR137', 'AR139', 'AR127', 'AR131', 'AR143', 'AR144']


In [114]:
corr_avg_days = {}
corr_avg_pre_post = {}
metadata = {}
pop_vectors_dict = {}
lmi = {}

mice = ['AR127']

for mouse_id in mice:
    output_dir = fr'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/mice/{mouse_id}'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                        nwb_dir,
                                                                        two_p_imaging='yes',
                                                                        subject_id=mouse_id,
                                                                        day=days,)
    print(session_list)
    
    data = []
    mdata_list = []
    for session_id in session_list:
        arr, mdata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                            session_id,
                                                            processed_dir)
        arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
        data.append(arr)
        mdata_list.append(mdata)
    reward_group = io.get_reward_group_from_db(db_path, session_list[0])
    metadata[mouse_id] = {}
    metadata[mouse_id]['reward_group'] = reward_group
    
    # Extract UM trials.
    for i, arr in enumerate(data):
        arr = imaging_utils.extract_trials(arr, mdata_list[i], 'UM', n_trials=None)
        data[i] = arr

    corr_avg_days[mouse_id] = {}
    corr_avg_pre_post[mouse_id] = {}
    pop_vectors_dict[mouse_id] = {}
    
    for cell_type in ['allcells', 'wS2', 'wM1']:
        # Select cell type.
        if cell_type == 'allcells':
            data_subtype = data
        else:
            data_subtype = []
            cell_type_mask = mdata_list[0]['cell_types']==cell_type
            data_subtype = [arr[cell_type_mask] for arr in data]

        # strong_cells = [3,11,33,48,57,67,80,86,104,153,166,175]
        # mask = np.ones(data_subtype[0].shape[0], dtype=bool)
        # mask[strong_cells] = False
        # data_subtype = [arr[mask] for arr in data_subtype]

        # If no cells of the specified type, skip.
        if data_subtype[0].shape[0] == 0:
            continue

        # Compute average response for each trial, each day.
        
        response_avg = []
        for day in data_subtype:
            response_avg.append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))

        pop_vectors = np.concatenate(response_avg, axis=1)
        pop_vectors_dict[mouse_id][cell_type] = pop_vectors

        # Compute LMI.
        if cell_type == 'allcells':
            # pre = np.mean(np.concatenate(response_avg[0:2], axis=1), axis=1)
            # print(pre.shape)
            # post = np.mean(np.concatenate((response_avg[5], response_avg[7]), axis=1), axis=1)
            # lmi[mouse_id] = (post - pre) / (np.abs(post) + np.abs(pre))
            lmis = []
            for icell in range(pop_vectors.shape[0]):
                # mapping trials of D-2, D-1, D+1, D+2.
                X = np.r_[response_avg[0][icell], response_avg[1][icell],
                          response_avg[3][icell], response_avg[4][icell]]
                y = np.r_[np.zeros(response_avg[0][icell].shape[0]),
                          np.zeros(response_avg[1][icell].shape[0]),
                          np.ones(response_avg[3][icell].shape[0]),
                          np.ones(response_avg[4][icell].shape[0])]
                fpr, tpr, _ = roc_curve(y, X)
                roc_auc = auc(fpr, tpr)
                lmis.append((roc_auc - 0.5) * 2)
            lmi[mouse_id] = np.array(lmis)
        
        corr_matrix = np.corrcoef(pop_vectors.T)
        # corr_matrix = cosine_similarity(pop_vectors.T)
        # corr_matrix = spearmanr(pop_vectors.T, axis=1)[0]

        # Compute average correlation inside each days.
        corr_avg_days[mouse_id][cell_type] = []
        n_trials = [arr.shape[1] for arr in data_subtype]
        for start, end in zip(np.cumsum([0] + n_trials[:-1]), np.cumsum(n_trials)):
            upper_triangle = np.triu(corr_matrix[start:end, start:end], k=1)
            corr_avg_days[mouse_id][cell_type].append(np.mean(upper_triangle))

        # Compare correlation between inside pre training days,
        # inside post training days and between pre and post training days.
        trial_cumsum = np.cumsum([0] + n_trials)
        pre_in_start_x, pre_in_end_x = trial_cumsum[1], trial_cumsum[2]
        pre_in_start_y, pre_in_end_y = trial_cumsum[0], trial_cumsum[1]
        pre_in = np.mean(corr_matrix[pre_in_start_x:pre_in_end_x, pre_in_start_y:pre_in_end_y])

        post_in_start_x, post_in_end_x = trial_cumsum[4], trial_cumsum[5]
        post_in_start_y, post_in_end_y = trial_cumsum[3], trial_cumsum[4]
        post_in = np.mean(corr_matrix[post_in_start_x:post_in_end_x, post_in_start_y:post_in_end_y])

        pre_post_start_x, pre_post_end_x = trial_cumsum[3], trial_cumsum[5]
        pre_post_start_y, pre_post_end_y = trial_cumsum[0], trial_cumsum[2]
        pre_post = np.mean(corr_matrix[pre_post_start_x:pre_post_end_x, pre_post_start_y:pre_post_end_y])

        corr_avg_pre_post[mouse_id][cell_type] = [pre_in, post_in, pre_post]


        # Plot population vectors.
        pdf_file = f'pop_vectors_{mouse_id}_{cell_type}.pdf'
        with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
            vmax = np.percentile(pop_vectors, 99)
            vmin = np.percentile(pop_vectors, 1)

            f = plt.figure()
            im = plt.imshow(pop_vectors, cmap='viridis', vmin=vmin, vmax=vmax)
            cbar = f.colorbar(im, ticks=[vmin, 0, vmax])
            cbar.ax.set_yticklabels([f'{vmin:.2f}', '0', f'> {vmax:.2f}'])
            cbar.ax.tick_params(size=0)
            pdf.savefig(dpi=300)
            plt.close()

        # Plot correlation matrix.
        pdf_file = f'correlation_matrices_trial_{mouse_id}_{cell_type}.pdf'            
        with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
            
            # Set color map limit to the max without the diagonal.
            vmax = np.max(corr_matrix[~np.eye(corr_matrix.shape[0], dtype=bool)])
            vmin = np.min(corr_matrix)
            f = plt.figure()
            im = plt.imshow(corr_matrix, vmin = vmin, vmax=vmax, cmap='viridis')
            n_trials = [arr.shape[1] for arr in data_subtype]
            for i in np.cumsum(n_trials)[:-1]:
                plt.axvline(x=i-1, color='#252525', linestyle='-', lw=0.5)
                plt.axhline(y=i-1, color='#252525', linestyle='-', lw=0.5)
            if cell_type:
                plt.title(f'{mouse_id} {reward_group} {cell_type}')
            else:
                plt.title(f'{mouse_id} {reward_group} all cells')
            cbar_ax = f.add_axes([0.85, 0.15, 0.05, 0.7])
            cbar = f.colorbar(im, cax=cbar_ax, ticks=[vmin, 0, vmax])
            cbar.ax.set_yticklabels([f'{vmin:.2f}', '0', f'> {vmax:.2f}'])
            cbar.ax.tick_params(size=0)
            pdf.savefig(dpi=300)
            plt.close()

['AR127_20240221_133407', 'AR127_20240222_152629', 'AR127_20240223_131820', 'AR127_20240224_140853', 'AR127_20240225_142858']


Plot average correlation inside pre-training days, inside post-training days and between pre- and post- training.

This is to see these two periods are two modes of activity that are more disimilar in the rewarded group.

It seems that the only difference between R+ and R- is the correlation among post-learning days which is higher. 

In [79]:
mouse_ids = corr_avg_days.keys()

df_corr_days = []
for mouse_id in mouse_ids:
    for cell_type in corr_avg_days[mouse_id].keys():
        for iday in range(len(days)):
            corr = corr_avg_days[mouse_id][cell_type][iday]
            temp = pd.DataFrame([[corr, days[iday], cell_type, mouse_id, metadata[mouse_id]['reward_group']]],
                                columns=['correlation','day', 'cell_type', 'mouse_id', 'reward_group'])
            df_corr_days.append(temp)
df_corr_days = pd.concat(df_corr_days)


df_corr_pre_post = []
for mouse_id in mouse_ids:
    for cell_type in corr_avg_days[mouse_id].keys():
        for i, comp in enumerate(['pre_in', 'post_in', 'pre_post']):
            corr = corr_avg_pre_post[mouse_id][cell_type][i]
            temp = pd.DataFrame([[corr, comp, cell_type, mouse_id, metadata[mouse_id]['reward_group']]],
                                columns=['correlation', 'comparison', 'cell_type', 'mouse_id', 'reward_group'])
            df_corr_pre_post.append(temp)
df_corr_pre_post = pd.concat(df_corr_pre_post)


palette = sns.color_palette(['#238443', '#d51a1c'])

plt.figure()
sns.barplot(data=df_corr_days.loc[df_corr_days.cell_type=='allcells'], x='day', y='correlation', hue='reward_group', palette=palette)

plt.figure()
sns.barplot(data=df_corr_pre_post.loc[df_corr_pre_post.cell_type=='allcells'], x='comparison', y='correlation', hue='reward_group')

Plot the population vectors and lmi.

This is to show that lmi select cells that go on and off as expected.

In [120]:
f, axes = plt.subplots(1, 2, sharey=True)
im = axes[0].imshow(np.repeat(lmi[mouse_id][:, np.newaxis], 10, axis=1), cmap='viridis', vmin=-1, vmax=1)
plt.colorbar(im)
vmax = np.percentile(pop_vectors_dict[mouse_id]['allcells'], 99)
vmin = np.percentile(pop_vectors_dict[mouse_id]['allcells'], 1)
im = axes[1].imshow(pop_vectors_dict[mouse_id]['allcells'], cmap='viridis', vmin=vmin, vmax=vmax)
plt.colorbar(im)
print(vmin, vmax)

-0.27537563 1.1496639


Plot global matrix for the whole population.

In [55]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.3)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
days = ['-2', '-1', '0', '+1', '+2']

# mice = ['GF323']

In [58]:
for reward_group in ['R+', 'R-']:
    for cell_type in ['allcells', 'wS2', 'wM1']:
        output_dir = r'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/sensory_plasticity/correlation_matrices'
        pdf_file = f'global_correlation_matrices_nobaselinesubstraction_{reward_group}_{cell_type}.pdf'

        _, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes',
                                            reward_group=reward_group)
        print(mice)

        average_response = {}
        for mouse_id in mice:
            print(mouse_id)
            # Disregard these mice as the number of trials is too low.
            if mouse_id in ['GF307', 'GF310', 'GF333', 'MI075']:
                continue
            session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                                nwb_dir,
                                                                                two_p_imaging='yes',
                                                                                subject_id=mouse_id,
                                                                                day=days,)
            print(session_list)
            data = []
            mdata_list = []
            for session_id in session_list:
                arr, mdata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                                    session_id,
                                                                    processed_dir)
                # arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
                data.append(arr)
                mdata_list.append(mdata)
            
            # Extract UM trials.
            for i, arr in enumerate(data):
                arr = imaging_utils.extract_trials(arr, mdata_list[i], 'UM', n_trials=None)
                data[i] = arr

            # Select cell type.
            if cell_type != 'allcells':
                cell_type_mask = mdata_list[0]['cell_types']==cell_type
                data = [arr[cell_type_mask] for arr in data]

            # Compute average response for each trial, each day.
            average_response[mouse_id] = []
            for day in data:
                average_response[mouse_id].append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))

        # Even the number of trials per days across mice.
        min_trials = []
        for iday in range(len(days)):
            m = [data[iday].shape[1] for _, data in average_response.items()]
            print(m)
            min_trials.append(np.min(m))

        for mouse, data in average_response.items():
            for iday in range(len(days)):
                average_response[mouse][iday] = data[iday][:, :min_trials[iday]]
            
        corr_matrix = np.concatenate([np.concatenate(data, axis=1) for _, data in average_response.items()], axis=0)
        corr_matrix = np.corrcoef(corr_matrix.T)
        # corr_matrix = spearmanr(pop_vectors.T, axis=1)[0]
        
        # Set color map limit to the max without the diagonal.
        vmax = np.max(corr_matrix[~np.eye(corr_matrix.shape[0], dtype=bool)])
        vmin = np.min(corr_matrix)

        with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
            f = plt.figure()
            im = plt.imshow(corr_matrix, vmin = vmin, vmax=vmax, cmap='viridis')
            n_trials = [arr.shape[1] for arr in data]
            for i in np.cumsum(n_trials)[:-1]:
                plt.axvline(x=i-1, color='#252525', linestyle='-', lw=0.5)
                plt.axhline(y=i-1, color='#252525', linestyle='-', lw=0.5)
            plt.title(f'{reward_group} {cell_type}')
            plt.title(reward_group)
            cbar_ax = f.add_axes([0.85, 0.15, 0.05, 0.7])
            cbar = f.colorbar(im, cax=cbar_ax, ticks=[vmin, 0, vmax])
            cbar.ax.set_yticklabels([f'{vmin:.2f}', '0', f'> {vmax:.2f}'])
            cbar.ax.tick_params(size=0)
            pdf.savefig(dpi=300)

['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF323', 'GF333', 'GF334', 'AR133', 'AR135', 'AR127', 'AR143', 'AR144']
GF305
['GF305_27112020_083119', 'GF305_28112020_103938', 'GF305_29112020_103331', 'GF305_30112020_110255', 'GF305_02122020_132229']
GF306
['GF306_27112020_104436', 'GF306_28112020_125555', 'GF306_29112020_131929', 'GF306_30112020_133249', 'GF306_02122020_161611']
GF307
GF308
['GF308_17112020_105052', 'GF308_18112020_093627', 'GF308_19112020_103527', 'GF308_20112020_122826', 'GF308_21112020_135515']
GF310
GF311
['GF311_17112020_155501', 'GF311_18112020_151838', 'GF311_19112020_160412', 'GF311_20112020_171609', 'GF311_21112020_180049']
GF313
['GF313_27112020_141857', 'GF313_28112020_154236', 'GF313_29112020_154625', 'GF313_30112020_154904', 'GF313_03122020_082147']
GF314
['GF314_27112020_160459', 'GF314_28112020_171800', 'GF314_29112020_174831', 'GF314_30112020_171906', 'GF314_03122020_102249']
GF317
['GF317_15122020_081931', '

In [178]:
from sklearn.metrics.pairwise import cosine_similarity

sim = np.concatenate(response_avg, axis=1)
sim = cosine_similarity(sim.T)

plt.figure()
# vmin=-1
# vmax=1
# plt.imshow(corr_matrix, vmin=vmin, vmax=vmax)
plt.imshow(sim)
n_trials = [arr.shape[1] for arr in data]
for i in np.cumsum(n_trials)[:-1]:
    plt.axvline(x=i, color='#252525', linestyle='-',)
    plt.axhline(y=i, color='#252525', linestyle='-',)
plt.title(mouse_id)

plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x207f56f5610>

### 1.3 Correlation during learning.

When is the change of correlation triggered during D0 whisker learning?

- First, plot correlation matrix with WH trials stacked with UM.
- then point plot the correlation of each trial with the average maaping response of D+2
- select modulated cells with LMI and plot population vectors for WH and UM. Is there a graded response? a discret change? or do they respond strong since the very first trial?



In [174]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.150)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
reward_group = 'R-'
plot_save_figs = False
days = ['-2', '-1', '0', '+1', '+2']
wh_trial_type = 'WH'

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes')
print(mice)


['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF319', 'GF323', 'GF333', 'GF334', 'GF348', 'GF350', 'MI062', 'MI069', 'MI072', 'MI075', 'MI076', 'AR132', 'AR133', 'AR135', 'AR137', 'AR139', 'AR127', 'AR131', 'AR143', 'AR144']


In [175]:
corr_avg_days = {}
corr_avg_pre_post = {}
metadata = {}
pop_vectors_dict = {}
n_trials = {}
lmi = {}

# mice = ['GF334']

# Disregard these mice as the number of trials is too low.
mice =  [mouse for mouse in mice if mouse not in ['GF307', 'GF310', 'GF333', 'MI075']]

for mouse_id in mice:
    output_dir = fr'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/mice/{mouse_id}'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                        nwb_dir,
                                                                        two_p_imaging='yes',
                                                                        subject_id=mouse_id,
                                                                        day=days,)
    print(session_list)
    
    data = []
    mdata_list = []
    for session_id in session_list:
        arr, mdata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                            session_id,
                                                            processed_dir)
        arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
        data.append(arr)
        mdata_list.append(mdata)
    reward_group = io.get_reward_group_from_db(db_path, session_list[0])
    metadata[mouse_id] = {}
    metadata[mouse_id]['reward_group'] = reward_group
    
    # Extract UM and WH trials.
    if reward_group == 'R+':
        n_um = 45
        n_wh = 30
    else:
        n_um = 45
        n_wh = 10
    
    if wh_trial_type == 'WH':
        # Some days are no WH trials for the mouse.
        if mouse_id == 'AR132':
            continue

    activity = []
    arr = imaging_utils.extract_trials(data[0], mdata_list[0], 'UM', n_trials=n_um)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[1], mdata_list[1], 'UM', n_trials=n_um)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[2], mdata_list[2], wh_trial_type, n_trials=n_wh)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[2], mdata_list[2], 'UM', n_trials=n_um)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[3], mdata_list[3], wh_trial_type, n_trials=n_wh)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[3], mdata_list[3], 'UM', n_trials=n_um)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[4], mdata_list[4], wh_trial_type, n_trials=n_wh)
    activity.append(arr)
    arr = imaging_utils.extract_trials(data[4], mdata_list[4], 'UM', n_trials=n_um)
    activity.append(arr)

    # # Print n trials.
    # print([arr.shape[1] for arr in activity])

    corr_avg_days[mouse_id] = {}
    corr_avg_pre_post[mouse_id] = {}
    pop_vectors_dict[mouse_id] = {}
    n_trials[mouse_id] = {}

    for cell_type in ['allcells']:
        # Select cell type.
        if cell_type == 'allcells':
            activity_subtype = activity
        else:
            activity_subtype = []
            cell_type_mask = mdata_list[0]['cell_types']==cell_type
            activity_subtype = [arr[cell_type_mask] for arr in activity]
        
        # strong_cells = [3,11,33,48,57,67,80,86,104,153,166,175]
        # mask = np.ones(data_subtype[0].shape[0], dtype=bool)
        # mask[strong_cells] = False
        # data_subtype = [arr[mask] for arr in data_subtype]

        # If no cells of the specified type, skip.
        if activity_subtype[0].shape[0] == 0:
            continue

        # Compute average response for each trial, each day.
        response_avg = []
        for d in activity_subtype:
            response_avg.append(np.nanmean(d[:, :, win[0]:win[1]], axis=2))

        pop_vectors = np.concatenate(response_avg, axis=1)
        pop_vectors_dict[mouse_id][cell_type] = pop_vectors
        
        # Compute LMI.
        if cell_type == 'allcells':
            # pre = np.mean(np.concatenate(response_avg[0:2], axis=1), axis=1)
            # print(pre.shape)
            # post = np.mean(np.concatenate((response_avg[5], response_avg[7]), axis=1), axis=1)
            # lmi[mouse_id] = (post - pre) / (np.abs(post) + np.abs(pre))
            lmis = []
            for icell in range(pop_vectors.shape[0]):
                # mapping trials of D-2, D-1, D+1, D+2.
                X = np.r_[response_avg[0][icell], response_avg[1][icell],
                          response_avg[5][icell], response_avg[7][icell]]
                y = np.r_[np.zeros(response_avg[0][icell].shape[0]),
                          np.zeros(response_avg[1][icell].shape[0]),
                          np.ones(response_avg[5][icell].shape[0]),
                          np.ones(response_avg[7][icell].shape[0])]
                fpr, tpr, _ = roc_curve(y, X)
                roc_auc = auc(fpr, tpr)
                lmis.append((roc_auc - 0.5) * 2)
            lmi[mouse_id] = np.array(lmis)

        corr_matrix = np.corrcoef(pop_vectors.T)
        # corr_matrix = cosine_similarity(pop_vectors.T)
        # corr_matrix = spearmanr(pop_vectors.T, axis=1)[0]

        # Compute average correlation inside each days.
        corr_avg_days[mouse_id][cell_type] = []
        n_trials[mouse_id] = [arr.shape[1] for arr in activity_subtype]
        for start, end in zip(np.cumsum([0] + n_trials[mouse_id][:-1]), np.cumsum(n_trials[mouse_id])):
            upper_triangle = np.triu(corr_matrix[start:end, start:end], k=1)
            corr_avg_days[mouse_id][cell_type].append(np.mean(upper_triangle))

        # Compare correlation between inside pre training days,
        # inside post training days and between pre and post training days.
        trial_cumsum = np.cumsum([0] + n_trials[mouse_id])
        pre_in_start_x, pre_in_end_x = trial_cumsum[1], trial_cumsum[2]
        pre_in_start_y, pre_in_end_y = trial_cumsum[0], trial_cumsum[1]
        pre_in = np.mean(corr_matrix[pre_in_start_x:pre_in_end_x, pre_in_start_y:pre_in_end_y])

        post_in_start_x, post_in_end_x = trial_cumsum[4], trial_cumsum[5]
        post_in_start_y, post_in_end_y = trial_cumsum[3], trial_cumsum[4]
        post_in = np.mean(corr_matrix[post_in_start_x:post_in_end_x, post_in_start_y:post_in_end_y])

        pre_post_start_x, pre_post_end_x = trial_cumsum[3], trial_cumsum[5]
        pre_post_start_y, pre_post_end_y = trial_cumsum[0], trial_cumsum[2]
        pre_post = np.mean(corr_matrix[pre_post_start_x:pre_post_end_x, pre_post_start_y:pre_post_end_y])

        corr_avg_pre_post[mouse_id][cell_type] = [pre_in, post_in, pre_post]

        if plot_save_figs:
            # Plot population vectors.
            pdf_file = f'pop_vectors_wh_um_{mouse_id}_{cell_type}.pdf'
            with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
                vmax = np.percentile(pop_vectors, 99)
                vmin = np.percentile(pop_vectors, 1)

                f = plt.figure()
                im = plt.imshow(pop_vectors, cmap='viridis', vmin=vmin, vmax=vmax)
                cbar = f.colorbar(im, ticks=[vmin, 0, vmax])
                cbar.ax.set_yticklabels([f'{vmin:.2f}', '0', f'> {vmax:.2f}'])
                cbar.ax.tick_params(size=0)
                pdf.savefig(dpi=300)
                plt.close()

            # Plot correlation matrix.
            pdf_file = f'correlation_matrices_trial_wh_um_{mouse_id}_{cell_type}.pdf'            
            with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
                
                # Set color map limit to the max without the diagonal.
                vmax = np.max(corr_matrix[~np.eye(corr_matrix.shape[0], dtype=bool)])
                vmin = np.min(corr_matrix)
                f = plt.figure()
                im = plt.imshow(corr_matrix, vmin = vmin, vmax=vmax, cmap='viridis')
                n_trials[mouse_id] = [arr.shape[1] for arr in activity]
                for i in np.cumsum(n_trials[mouse_id])[:-1]:
                    plt.axvline(x=i-1, color='#252525', linestyle='-', lw=0.5)
                    plt.axhline(y=i-1, color='#252525', linestyle='-', lw=0.5)
                if cell_type:
                    plt.title(f'{mouse_id} {reward_group} {cell_type}')
                else:
                    plt.title(f'{mouse_id} {reward_group} all cells')
                cbar_ax = f.add_axes([0.85, 0.15, 0.05, 0.7])
                cbar = f.colorbar(im, cax=cbar_ax, ticks=[vmin, 0, vmax])
                cbar.ax.set_yticklabels([f'{vmin:.2f}', '0', f'> {vmax:.2f}'])
                cbar.ax.tick_params(size=0)
                pdf.savefig(dpi=300)
                plt.close()

['GF305_27112020_083119', 'GF305_28112020_103938', 'GF305_29112020_103331', 'GF305_30112020_110255', 'GF305_02122020_132229']
[45, 45, 30, 45, 30, 45, 30, 45]
['GF306_27112020_104436', 'GF306_28112020_125555', 'GF306_29112020_131929', 'GF306_30112020_133249', 'GF306_02122020_161611']
[45, 45, 30, 45, 30, 45, 30, 45]
['GF308_17112020_105052', 'GF308_18112020_093627', 'GF308_19112020_103527', 'GF308_20112020_122826', 'GF308_21112020_135515']
[45, 45, 30, 45, 30, 45, 30, 45]
['GF311_17112020_155501', 'GF311_18112020_151838', 'GF311_19112020_160412', 'GF311_20112020_171609', 'GF311_21112020_180049']
[45, 45, 30, 45, 30, 45, 30, 45]
['GF313_27112020_141857', 'GF313_28112020_154236', 'GF313_29112020_154625', 'GF313_30112020_154904', 'GF313_03122020_082147']
[45, 45, 30, 45, 30, 45, 30, 45]
['GF314_27112020_160459', 'GF314_28112020_171800', 'GF314_29112020_174831', 'GF314_30112020_171906', 'GF314_03122020_102249']
[45, 45, 30, 45, 30, 45, 30, 45]
['GF317_15122020_081931', 'GF317_16122020_0820

Population vector plot on the stim for the modulated cells. For WH and UM.

I'm afraid the modulated cells repond high already at the first WH.

In [235]:
rewarded_mice = [mouse_id for mouse_id in pop_vectors_dict.keys() if metadata[mouse_id]['reward_group']=='R+']
unrewarded_mice = [mouse_id for mouse_id in pop_vectors_dict.keys() if metadata[mouse_id]['reward_group']=='R-']

# Compute the LMI thresholds for the top 5% most modulated cells and bottom 5% least modulated cells
lmi_threshold_top = np.percentile(lmi[rewarded_mice[0]], 95)
lmi_threshold_bottom = np.percentile(lmi[rewarded_mice[0]], 5)

# Select the top 5% most modulated cells and bottom 5% least modulated cells for each rewarded mouse
rewarded_pop_vectors_top = np.concatenate(
    [pop_vectors_dict[mouse_id]['allcells'][lmi[mouse_id] > lmi_threshold_top] for mouse_id in rewarded_mice], axis=0
)
rewarded_pop_vectors_bottom = np.concatenate(
    [pop_vectors_dict[mouse_id]['allcells'][lmi[mouse_id] < lmi_threshold_bottom] for mouse_id in rewarded_mice], axis=0
)

# Select the top 5% most modulated cells and bottom 5% least modulated cells for each unrewarded mouse
unrewarded_pop_vectors_top = np.concatenate(
    [pop_vectors_dict[mouse_id]['allcells'][lmi[mouse_id] > lmi_threshold_top_unrewarded] for mouse_id in unrewarded_mice], axis=0
)
unrewarded_pop_vectors_bottom = np.concatenate(
    [pop_vectors_dict[mouse_id]['allcells'][lmi[mouse_id] < lmi_threshold_bottom_unrewarded] for mouse_id in unrewarded_mice], axis=0
)


In [239]:
if wh_trial_type == 'WH':
    block_edges_rew = np.cumsum([45, 45, 30, 45, 30, 45, 30, 45])[:-1]
    block_edges_unrew = np.cumsum([45, 45, 10, 45, 10, 45, 10, 45])[:-1]
elif wh_trial_type == 'WM':
    block_edges_rew = np.cumsum([45, 45, 30, 45, 30, 45, 30, 45])[:-1]
    block_edges_unrew = np.cumsum([45, 45, 10, 45, 10, 45, 10, 45])[:-1]

vmax = np.percentile(rewarded_pop_vectors_top, 99)
vmin = np.percentile(rewarded_pop_vectors_top, 1)

plt.figure()
plt.imshow(rewarded_pop_vectors_top, cmap='viridis', vmin=vmin, vmax=vmax)
for i in block_edges_rew-0.5:
    plt.axvline(x=i, color='white', linestyle='--', linewidth=1)
plt.xticks(block_edges_rew-0.5, block_edges_rew)
plt.colorbar()
plt.show()
plt.title('Top 5% positively modulated cells for each mouse -- R+')

# Plot for the bottom 5% negatively modulated cells -- R+
vmax = np.percentile(rewarded_pop_vectors_bottom, 99)
vmin = np.percentile(rewarded_pop_vectors_bottom, 1)
plt.figure()
plt.imshow(rewarded_pop_vectors_bottom, cmap='viridis', vmin=vmin, vmax=vmax)
for i in block_edges_rew-0.5:
    plt.axvline(x=i, color='white', linestyle='--', linewidth=1)
plt.xticks(block_edges_rew-0.5, block_edges_rew)
plt.colorbar()
plt.show()
plt.title('Bottom 5% negatively modulated cells for each mouse -- R+')

# Compute the LMI thresholds for the top 5% most modulated cells and bottom 5% least modulated cells for unrewarded mice
lmi_threshold_top_unrewarded = np.percentile(lmi[unrewarded_mice[0]], 95)
lmi_threshold_bottom_unrewarded = np.percentile(lmi[unrewarded_mice[0]], 5)


# Plot for the top 5% positively modulated cells -- R-
vmax = np.percentile(unrewarded_pop_vectors_top, 99)
vmin = np.percentile(unrewarded_pop_vectors_top, 1)
plt.figure()
plt.imshow(unrewarded_pop_vectors_top, cmap='viridis', vmin=vmin, vmax=vmax)
for i in block_edges_unrew-0.5:
    plt.axvline(x=i, color='white', linestyle='--', linewidth=1)
plt.xticks(block_edges_unrew-0.5, block_edges_unrew)
plt.colorbar()
plt.show()
plt.title('Top 5% positively modulated cells -- R-')

# Plot for the bottom 5% negatively modulated cells -- R-
vmax = np.percentile(unrewarded_pop_vectors_bottom, 99)
vmin = np.percentile(unrewarded_pop_vectors_bottom, 1)
plt.figure()
plt.imshow(unrewarded_pop_vectors_bottom, cmap='viridis', vmin=vmin, vmax=vmax)
for i in block_edges_unrew-0.5:
    plt.axvline(x=i, color='white', linestyle='--', linewidth=1)
plt.xticks(block_edges_unrew-0.5, block_edges_unrew)
plt.colorbar()
plt.show()
plt.title('Bottom 5% negatively modulated cells for each mouse -- R-')

Text(0.5, 1.0, 'Bottom 5% negatively modulated cells for each mouse -- R-')

Scatter plot of the correlation of each D0 WH with post learning UM.

First have a look with the global population.

In [250]:
block_edges_rew = np.cumsum([45, 45, 30, 45, 30, 45, 30, 45])
print(block_edges_rew)
pre = rewarded_pop_vectors_top[:, :block_edges_rew[1]]
post = np.concatenate((rewarded_pop_vectors_top[:,block_edges_rew[4]:block_edges_rew[5]],
                       rewarded_pop_vectors_top[:,block_edges_rew[6]:block_edges_rew[7]]), axis=1)
d0_learning = rewarded_pop_vectors_top[:,block_edges_rew[2]:block_edges_rew[3]]

correlations = []
correlations.append(np.mean(np.corrcoef(pre.T, post.T)[0:post.shape[1], pre.shape[1]:]))
for i in range(d0_learning.shape[1]):
    correlations.append(np.mean(np.corrcoef(d0_learning[:, i], post.T)[0:post.shape[1], 0]))
correlation = np.array(correlations)

plt.figure()
plt.scatter(range(correlation.shape[0]), correlation)
plt.xlabel('Correlation with Pre')
plt.ylabel('Correlation with Post')
plt.title('Correlation of Pre and Each Vector in D0 Learning with Post')
plt.ylim([-1, 1])
plt.show()



[ 45  90 120 165 195 240 270 315]


(175, 90)


array([[ 0.02034362,  0.26056918,  0.04235924, ...,  0.18045816,
         0.1439456 ,  0.1596096 ],
       [ 0.03551034,  0.13685159,  0.22050762, ...,  0.1917738 ,
         0.22206861,  0.2400069 ],
       [-0.02437256,  0.10172189,  0.03464237, ...,  0.35288548,
         0.09057352,  0.18831184],
       ...,
       [ 0.12885872,  0.19384733, -0.0238691 , ...,  0.10951992,
         0.18027061,  0.07334732],
       [ 0.03851379,  0.16146571,  0.00986161, ...,  0.31289878,
         0.27375669,  0.14097796],
       [ 0.13142371,  0.31517988,  0.04071624, ...,  0.17331523,
         0.10635769,  0.1278629 ]])

In [233]:
corr_matrix = np.corrcoef(rewarded_pop_vectors_top.T)
vmax = np.max(corr_matrix[~np.eye(corr_matrix.shape[0], dtype=bool)])
vmin = np.min(corr_matrix)

plt.figure()
im = plt.imshow(corr_matrix, cmap='viridis', vmin=vmin, vmax=vmax)
cbar = f.colorbar(im, ticks=[vmin, 0, vmax])
cbar.ax.set_yticklabels([f'{vmin:.2f}', '0', f'> {vmax:.2f}'])
cbar.ax.tick_params(size=0)

for i in block_edges_rew-0.5:
    plt.axvline(x=i, color='white', linestyle='--', linewidth=1)
    plt.axhline(y=i, color='white', linestyle='--', linewidth=1)
plt.xticks(block_edges_rew-0.5, block_edges_rew)



Adding colorbar to a different Figure <Figure size 640x480 with 2 Axes> than <Figure size 640x480 with 8 Axes> which fig.colorbar is called on.



([<matplotlib.axis.XTick at 0x22091d70cb0>,
  <matplotlib.axis.XTick at 0x22091d71b20>,
  <matplotlib.axis.XTick at 0x2208e2fa5d0>,
  <matplotlib.axis.XTick at 0x2208e306330>,
  <matplotlib.axis.XTick at 0x2208e305550>,
  <matplotlib.axis.XTick at 0x2208e360d40>,
  <matplotlib.axis.XTick at 0x2208e3625a0>],
 [Text(44.5, 0, '45'),
  Text(89.5, 0, '90'),
  Text(119.5, 0, '120'),
  Text(164.5, 0, '165'),
  Text(194.5, 0, '195'),
  Text(239.5, 0, '240'),
  Text(269.5, 0, '270')])

In [227]:
corr_matrix = np.corrcoef(rewarded_pop_vectors_top.T)
plt.figure()
im = plt.imshow(corr_matrix, cmap='viridis')
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x2209ce2d670>

In [61]:
# Print population vectors for specific LMI cells.
mouse_id = 'AR127'
alpha = 5
top = np.percentile(lmi[mouse_id], 100 - alpha)
bottom = np.percentile(lmi[mouse_id], alpha)
lmi_mask = (lmi[mouse_id] >= top) 
print(top)
print(bottom)
print(np.sum(lmi_mask))

plt.imshow(pop_vectors_dict[mouse_id]['allcells'][lmi_mask], cmap='viridis')

1.0
-1.0
31


<matplotlib.image.AxesImage at 0x22061a5c860>

In [252]:
mouse_ids = corr_avg_days.keys()
correlations = {}

for mouse_id in mouse_ids:
    trial_boundaries = np.cumsum([0] + n_trials[mouse_id])
    post_training = np.mean(pop_vectors_dict[mouse_id]['allcells'][:, trial_boundaries[-2]:], axis=1)
    correlations[mouse_id] = np.corrcoef(pop_vectors_dict[mouse_id]['allcells'], post_training, rowvar=False)[-1, :-1]
    print(correlations[mouse_id].shape)
    # repeated_post_training = np.tile(post_training, (10, 1))
    # plt.figure()
    # plt.imshow(repeated_post_training.T, cmap='viridis',vmin=-0.27, vmax=1.3)


(260,)
(260,)
(260,)
(260,)
(260,)
(260,)
(260,)
(260,)
(260,)
(260,)
(260,)
(260,)


In [270]:
dfs = []
block_labels = [f'block_{i}' for i in range(1, 8)]

df = []
for mouse_id in mouse_ids:
    for cell_type in corr_avg_days[mouse_id].keys():
        if pop_vectors_dict[mouse_id][cell_type].shape[0] < 5:
            continue
        trial_boundaries = np.cumsum([0] + n_trials[mouse_id])
        post_training = np.mean(pop_vectors_dict[mouse_id][cell_type][:, trial_boundaries[-2]:], axis=1, keepdims=True)
        corr = np.corrcoef(pop_vectors_dict[mouse_id][cell_type], post_training, rowvar=False)[-1, :-1]
        
        # blocks = [i for i in range(0, 8) for _ in range(trial_boundaries[i],trial_boundaries[i+1])]
        # trial_id_in_blocks = np.concat([np.arange(0, n_trials[mouse_id][i]) for i in range(8)])
        # block_trial_id = [(block, trial) for block, trial in zip(blocks, trial_id_in_blocks)]
        trial_ids = np.arange(corr.shape[0])

        # multi_index = pd.MultiIndex.from_tuples([('block', 'trial_id')], names=['level_1', 'level_2'])
        df.append(pd.DataFrame([[c, i, cell_type, mouse_id, metadata[mouse_id]['reward_group']] for c, i in zip(corr, trial_ids)],
                            columns=['correlation', 'trial', 'cell_type', 'mouse_id', 'reward_group']))
df = pd.concat(df, ignore_index=True)



In [271]:
sns.pointplot(data=df.loc[df.cell_type=='allcells'], x='trial', y='correlation', linestyles='none', errorbar=None)
plt.ylim([-1,1])
ax = plt.gca()
ax.set_xticks(range(0,280,20))

[<matplotlib.axis.XTick at 0x1fd779938c0>,
 <matplotlib.axis.XTick at 0x1fd5a1a7e60>,
 <matplotlib.axis.XTick at 0x1fd47bbafc0>,
 <matplotlib.axis.XTick at 0x1fd5d194110>,
 <matplotlib.axis.XTick at 0x1fd5d177320>,
 <matplotlib.axis.XTick at 0x1fd5d177e60>,
 <matplotlib.axis.XTick at 0x1fd5d1646b0>,
 <matplotlib.axis.XTick at 0x1fd5d165190>,
 <matplotlib.axis.XTick at 0x1fd5d194470>,
 <matplotlib.axis.XTick at 0x1fd5d165490>,
 <matplotlib.axis.XTick at 0x1fd5d166240>,
 <matplotlib.axis.XTick at 0x1fd5d166ae0>,
 <matplotlib.axis.XTick at 0x1fd5d167590>,
 <matplotlib.axis.XTick at 0x1fd5d167230>]

In [268]:
pop_vectors_dict[mouse_id][cell_type].shape

(1, 260)

In [269]:
post_training.shape

(1, 1)

In [None]:
n_blocks = 8
mapping_block = [0, 1, 3, 5, 7]
learning_block = [2, 4, 6]

sns.set_theme(context='paper', style='ticks', palette='deep')
palette = sns.color_palette()
f, axes = plt.subplots(2, 1, figsize=(15, 6))

for i in range(n_blocks):
    if i in learning_block:
        # color = '#238443'
        color = 'red'

    else:
        color = '#eea429ff'
    axes[0].scatter(range(trial_boundaries[i], trial_boundaries[i+1]),
                    correlations[trial_boundaries[i]:trial_boundaries[i+1]],
                    color=color)
axes[0].set_ylim(-1, 1)
# if apply_pca:
#     plt.title('Correlation\n' \
#               f'mice {mouse_list} ' \
#               f'variance retained: {variance_to_retain}')
# else:   
plt.title('Correlation\n' \
            f'mice {mouse_id} ' \
            'full data (no dim reduction)')

behav_table = nwb_read.get_trial_table(nwb_files[2])
behav_table = compute_performance(behav_table, session_list[2], db_path)

palette = sns.color_palette()
plot_single_session(behav_table, session_list[2], axes[1])


### 1.4 Functional maps across learning days

- amplitude of response
- significance levels (p-value maps)
- LMI

In [131]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.5)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
reward_group = 'R+'

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes',
                                            reward_group=reward_group)
print(mice)

['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF323', 'GF333', 'GF334', 'AR133', 'AR135', 'AR127', 'AR143', 'AR144']


  for idx, row in parser.parse():


In [None]:
output_dir = r'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/functional_maps'
pdf_file = f'functional_maps_{reward_group}.pdf'
with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
    for mouse_id in mice:
        print(mouse_id)
        session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                            nwb_dir,
                                                                            two_p_imaging='yes',
                                                                            day=days,
                                                                            subject_id=mouse_id)
        print(session_list)
        data = []
        for session_id in session_list:
            arr, metadata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                                session_id,
                                                                processed_dir)
            arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
            data.append(arr)

        # Select UM trials.
        data = [arr[:, -1] for arr in data]
        # Remove trials with NaNs.
        data = [arr[:, ~np.isnan(arr).all(axis=(0,2))] for arr in data]

        # Load image masks.
        roi_masks = nwb_read.get_image_mask(nwb_files[0])
        roi_masks = np.stack(roi_masks, axis=0)
        
        # Compute significance map.
        # -------------------------
        
        # Compute average response and baseline for each trial, each day.
        baseline_avg = []
        response_avg = []
        for day in data:
            baseline_avg.append(np.nanmean(day[:, :, baseline_win[0]:baseline_win[1]], axis=2))
            response_avg.append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))

        # Compare response amplitude to baseline.
        n_cells = data[0].shape[0]
        p_values = [np.zeros(n_cells) for _ in range(len(data))]
        for iday, day in enumerate(data):
            for icell in range(n_cells):
                _, p_values[iday][icell] = wilcoxon(baseline_avg[iday][icell], response_avg[iday][icell])
        p_values = np.stack(p_values, axis=0)

        # Categories p-values.
        p_values_masks = np.copy(p_values)
        p_values_masks[p_values>0.05] = 1
        p_values_masks[p_values<=0.05] = 2
        p_values_masks[p_values<=0.01] = 3
        p_values_masks[p_values<=0.001] = 4
        
        map_significance = []
        for iday in range(5):
            maps = roi_masks * p_values_masks[iday, :, None, None]
            map_significance.append(np.max(maps, axis=0))
            
        
        # Compute amplitude map.
        # ----------------------
        
        # Compute average response amplitude for each cell.
        response_amplitude = []
        for day in response_avg:
            response_amplitude.append(np.nanmean(day, axis=1))
        response_amplitude = np.stack(response_amplitude, axis=0)
        
        map_amplitude = []
        for iday in range(5):
            maps = roi_masks * response_amplitude[iday, :, None, None]
            map_amplitude.append(np.max(maps, axis=0))
    

        # Plot maps.
        # ----------
        
        f, axes = plt.subplots(2,5, figsize=(20, 8), sharex=True, sharey=True)
        
        # Plot amplitude maps.
        cmap = sns.color_palette("viridis", as_cmap=True)
        # vmin = np.nanmin(response_amplitude)
        vmin = 0
        vmax = np.percentile(response_amplitude, 98)
        
        for iday in range(5):
            a = axes[0,iday].imshow(map_amplitude[iday],
                                interpolation='nearest',
                                cmap=cmap,
                                vmin=vmin, vmax=vmax)
        cbar_ax = f.add_axes([.91,.124,.04,.754])
        f.colorbar(a, cax=cbar_ax, location='right')
        
        
        cmap = ['white', '#d9d9d9', '#fdbb84', '#ef6548', '#990000']
        cmap = colors.ListedColormap(cmap)
        bounds = range(cmap.N+1)
        norm = colors.BoundaryNorm(bounds, cmap.N)
        # Plot responsivity maps.
        for iday in range(5):
            axes[1, iday].imshow(map_significance[iday], cmap=cmap, norm=norm, interpolation='nearest')
            # axes[iday].imshow(map_significance[iday])
            
        plt.suptitle(mouse_id)
        pdf.savefig()
        plt.close()




## Learning modulation index (LMI)

Computing LMI maps.

What else other than maps do we want to look at? 


Check what is quantified in Drieu et al. to conclude no sensory plasticity.
- Number of neurons responsive across days.
- Variability of the response across days. But how to compute that? Correlation of the pop vector across trials could work well or cell by cell?
- Recompute the variability of the response. 


In [114]:
metadata['AR132']['reward_group']

KeyError: 'AR132'