In [1]:
import os
import pickle
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import colors
from scipy.stats import mannwhitneyu, wilcoxon
from sklearn.metrics import auc, roc_curve
from sklearn.utils import shuffle

sys.path.append(r'H:/anthony/repos/NWB_analysis')
from matplotlib.backends.backend_pdf import PdfPages
from nwb_wrappers import nwb_reader_functions as nwb_read

import src.utils.utils_imaging as imaging_utils
import src.utils.utils_io as io
from src.behavior import compute_performance, plot_single_session

# Set plot parameters.
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'

In [2]:
# Path to the directory containing the processed data.
processed_dir = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/data_processed/mice"
nwb_dir = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/NWB"

# Session metadata file.
db_path = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/session_metadata.xlsx"

# # Rewarded and non-rewarded NWB files.
# group_yaml_rew = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/groups/imaging_rewarded.yaml"
# group_yaml_non_rew = r"//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/mice_info/groups/imaging_non_rewarded.yaml"
# nwb_list_rew = io.read_group_yaml(group_yaml_rew)
# nwb_list_non_rew = io.read_group_yaml(group_yaml_non_rew)
# nwb_list = nwb_list_rew + nwb_list_non_rew

## 1. Response to sensory mapping trials across learning

Here I look at the evolution of the response of the population across learning day. This is computed on the 50 whisker stimulation presentated at the end of each session after disengagement.

- PSTH
- amplitude of the population response
- number of responsive neurons
- stability of the response



### 1.1. Amplitude of the response across learning days.

### 1.2. Correlation matrices and responsive similarity across learning.

In [31]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.5)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
days = ['-2', '-1', '0', '+1', '+2']
reward_group = 'R-'

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes',
                                            reward_group=reward_group)
print(mice)

['GF319', 'GF348', 'GF350', 'MI062', 'MI069', 'MI072', 'MI075', 'MI076', 'AR132', 'AR137', 'AR139', 'AR131']


  for idx, row in parser.parse():


In [32]:
output_dir = r'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/sensory_plasticity'
pdf_file = f'correlation_matrices_{reward_group}.pdf'
with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
    for mouse_id in mice:
        session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                            nwb_dir,
                                                                            two_p_imaging='yes',
                                                                            subject_id=mouse_id,
                                                                            day=days,)
        print(session_list)
        data = []
        for session_id in session_list:
            arr, metadata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                                session_id,
                                                                processed_dir)
            arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
            data.append(arr)

        # Select UM trials.
        data = [arr[:, -1] for arr in data]
        # Remove trials with NaNs.
        data = [arr[:, ~np.isnan(arr).all(axis=(0,2))] for arr in data]

        # Compute average response for each trial, each day.
        response_avg = []
        for day in data:
            response_avg.append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))

        # Compute average response amplitude for each cell across days.
        response_amplitude = []
        for day in response_avg:
            response_amplitude.append(np.nanmean(day, axis=1))
        response_amplitude = np.stack(response_amplitude, axis=0)
        population_response[0, :] = np.mean(response_amplitude, axis=1)


        corr_matrix = np.concatenate(response_avg, axis=1)
        corr_matrix = np.corrcoef(corr_matrix.T)
        # Remove diagonal for visualization.
        np.fill_diagonal(corr_matrix, 0)

        plt.figure()
        # vmin=-1
        # vmax=1
        # plt.imshow(corr_matrix, vmin=vmin, vmax=vmax)
        plt.imshow(corr_matrix)
        n_trials = [arr.shape[1] for arr in data]
        for i in np.cumsum(n_trials)[:-1]:
            plt.axvline(x=i, color='#252525', linestyle='-', lw=0.5)
            plt.axhline(y=i, color='#252525', linestyle='-', lw=0.5)
        plt.title(mouse_id)
        plt.colorbar()
        pdf.savefig(dpi=300)
        plt.close()

  for idx, row in parser.parse():


['GF319_24122020_120204', 'GF319_25122020_142951', 'GF319_26122020_144746', 'GF319_27122020_135842', 'GF319_28122020_132438']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF348_29052021_100151', 'GF348_30052021_110107', 'GF348_31052021_102411', 'GF348_01062021_095758', 'GF348_02062021_084344']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF350_29052021_124022', 'GF350_30052021_123155', 'GF350_31052021_135001', 'GF350_01062021_122420', 'GF350_02062021_142138']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['MI062_30092021_091006', 'MI062_01102021_091233', 'MI062_02102021_105027', 'MI062_03102021_103851', 'MI062_04102021_092339']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['MI069_19122021_100830', 'MI069_20122021_095058', 'MI069_21122021_090648', 'MI069_22122021_090212', 'MI069_23122021_085758']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['MI072_19122021_140553', 'MI072_20122021_125805', 'MI072_21122021_132704', 'MI072_22122021_132651', 'MI072_23122021_132111']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['MI075_19122021_152533', 'MI075_20122021_155245', 'MI075_21122021_151949', 'MI075_22122021_152806', 'MI075_23122021_150004']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['MI076_19122021_120004', 'MI076_20122021_113038', 'MI076_21122021_112146', 'MI076_22122021_114039', 'MI076_23122021_113818']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR132_20240424_112338', 'AR132_20240425_102625', 'AR132_20240426_093953', 'AR132_20240427_122605', 'AR132_20240428_122206']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR137_20240424_172627', 'AR137_20240425_170755', 'AR137_20240426_152510', 'AR137_20240427_171535', 'AR137_20240428_163224']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR139_20240424_185913', 'AR139_20240425_181627', 'AR139_20240426_165725', 'AR139_20240427_183701', 'AR139_20240428_180459']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR131_20240301_145952', 'AR131_20240302_123034', 'AR131_20240303_171032', 'AR131_20240304_133332', 'AR131_20240305_140141']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)


Make common matrix for the whole population.

One for each reward group. Also look at the difference between GF and AR.

In [52]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.5)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
days = ['-2', '-1', '0', '+1', '+2']
reward_group = 'R+'

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes',
                                            reward_group=reward_group)
print(mice)


['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF323', 'GF333', 'GF334', 'AR133', 'AR135', 'AR127', 'AR143', 'AR144']


  for idx, row in parser.parse():


In [60]:
output_dir = r'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/sensory_plasticity'
pdf_file = f'global_correlation_matrices_{reward_group}.pdf'

average_response = {}

for mouse_id in mice:
    session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                        nwb_dir,
                                                                        two_p_imaging='yes',
                                                                        subject_id=mouse_id,
                                                                        day=days,)
    print(session_list)
    data = []
    for session_id in session_list:
        arr, metadata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                            session_id,
                                                            processed_dir)
        arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
        data.append(arr)

    # Select UM trials.
    data = [arr[:, -1] for arr in data]
    # Remove trials with NaNs.
    data = [arr[:, ~np.isnan(arr).all(axis=(0,2))] for arr in data]

    # Compute average response for each trial, each day.
    average_response[mouse_id] = []
    for day in data:
        average_response[mouse_id].append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))


  for idx, row in parser.parse():


['GF305_27112020_083119', 'GF305_28112020_103938', 'GF305_29112020_103331', 'GF305_30112020_110255', 'GF305_02122020_132229']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF306_27112020_104436', 'GF306_28112020_125555', 'GF306_29112020_131929', 'GF306_30112020_133249', 'GF306_02122020_161611']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF307_17112020_080325', 'GF307_18112020_075939', 'GF307_19112020_083908', 'GF307_20112020_082942', 'GF307_21112020_102608']


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF308_17112020_105052', 'GF308_18112020_093627', 'GF308_19112020_103527', 'GF308_20112020_122826', 'GF308_21112020_135515']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF310_17112020_132720', 'GF310_18112020_122252', 'GF310_19112020_131953', 'GF310_20112020_150929', 'GF310_21112020_160059']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF311_17112020_155501', 'GF311_18112020_151838', 'GF311_19112020_160412', 'GF311_20112020_171609', 'GF311_21112020_180049']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF313_27112020_141857', 'GF313_28112020_154236', 'GF313_29112020_154625', 'GF313_30112020_154904', 'GF313_03122020_082147']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF314_27112020_160459', 'GF314_28112020_171800', 'GF314_29112020_174831', 'GF314_30112020_171906', 'GF314_03122020_102249']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF317_15122020_081931', 'GF317_16122020_082007', 'GF317_17122020_080715', 'GF317_18122020_104834', 'GF317_20122020_120604']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF318_15122020_095616', 'GF318_16122020_095516', 'GF318_17122020_144100', 'GF318_18122020_132105', 'GF318_19122020_155806']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF323_07012021_092005', 'GF323_08012021_083725', 'GF323_09012021_111716', 'GF323_11012021_084126', 'GF323_12012021_090219']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF333_21012021_125450', 'GF333_22012021_135939', 'GF333_24012021_145617', 'GF333_25012021_141608', 'GF333_26012021_142304']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['GF334_21012021_160130', 'GF334_22012021_153815', 'GF334_24012021_173019', 'GF334_25012021_163843', 'GF334_26012021_171010']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR133_20240424_130306', 'AR133_20240425_115233', 'AR133_20240426_113430', 'AR133_20240427_142253', 'AR133_20240428_134911']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR135_20240424_160805', 'AR135_20240425_151948', 'AR135_20240426_133260', 'AR135_20240427_154417', 'AR135_20240428_150944']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR127_20240221_133407', 'AR127_20240222_152629', 'AR127_20240223_131820', 'AR127_20240224_140853', 'AR127_20240225_142858']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR143_20240518_174556', 'AR143_20240519_141725', 'AR143_20240520_130137', 'AR143_20240521_125833', 'AR143_20240522_172846']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)
  for idx, row in parser.parse():


['AR144_20240518_193553', 'AR144_20240519_151737', 'AR144_20240520_141104', 'AR144_20240521_142259', 'AR144_20240522_190834']


  baseline = np.nanmean(arr[*slices], axis=time_axis, keepdims=True)


In [61]:
# Event the number of trials per days across mice.
min_trials = []
for iday in range(len(days)):
    m = [data[iday].shape[1] for mouse, data in average_response.items()]
    print(m)
    min_trials.append(np.min(m))

[49, 50, 50, 49, 49, 50, 50, 50, 46, 50, 50, 50, 48, 48, 46, 49, 50, 50]
[50, 50, 50, 50, 49, 50, 48, 50, 50, 49, 50, 50, 50, 49, 50, 49, 50, 50]
[49, 50, 37, 50, 50, 48, 50, 48, 50, 50, 50, 50, 50, 50, 50, 50, 49, 50]
[48, 49, 49, 50, 33, 50, 50, 50, 50, 48, 50, 43, 50, 47, 45, 49, 50, 48]
[49, 50, 49, 49, 49, 50, 49, 50, 49, 50, 50, 49, 47, 44, 50, 49, 50, 47]


In [62]:
mice

['GF305',
 'GF306',
 'GF307',
 'GF308',
 'GF310',
 'GF311',
 'GF313',
 'GF314',
 'GF317',
 'GF318',
 'GF323',
 'GF333',
 'GF334',
 'AR133',
 'AR135',
 'AR127',
 'AR143',
 'AR144']

In [None]:

for mouse, data in average_response.items():
    for iday in range(len(days)):
        average_response[mouse][iday] = data[iday][:, :min_trials[iday]]
       
corr_matrix = np.concatenate([np.concatenate(data, axis=1) for mouse, data in average_response.items()], axis=0)
corr_matrix = np.corrcoef(corr_matrix.T)
# Remove diagonal for visualization.
np.fill_diagonal(corr_matrix, 0)

with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
    plt.figure()
    # vmin=-1
    # vmax=1
    # plt.imshow(corr_matrix, vmin=vmin, vmax=vmax)
    plt.imshow(corr_matrix)
    n_trials = [arr.shape[1] for arr in data]
    for i in np.cumsum(n_trials)[:-1]:
        plt.axvline(x=i-1, color='#252525', linestyle='-', lw=0.5)
        plt.axhline(y=i-1, color='#252525', linestyle='-', lw=0.5)
    plt.title(reward_group)
    plt.colorbar()
    pdf.savefig(dpi=300)
    # plt.close()

In [59]:
average_response['GF305'][0].shape

(133, 46)

In [178]:
from sklearn.metrics.pairwise import cosine_similarity

sim = np.concatenate(response_avg, axis=1)
sim = cosine_similarity(sim.T)


plt.figure()
# vmin=-1
# vmax=1
# plt.imshow(corr_matrix, vmin=vmin, vmax=vmax)
plt.imshow(sim)
n_trials = [arr.shape[1] for arr in data]
for i in np.cumsum(n_trials)[:-1]:
    plt.axvline(x=i, color='#252525', linestyle='-',)
    plt.axhline(y=i, color='#252525', linestyle='-',)
plt.title(mouse_id)

plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x207f56f5610>

### 1.3. Functional maps across learning days

- amplitude of response
- significance levels (p-value maps)
- LMI

In [131]:
# Load data needed to compute before and after learning.

sampling_rate = 30
win = (1, 1.5)  # from stimulus onset to 300 ms after.
win = (int(win[0] * sampling_rate), int(win[1] * sampling_rate))
baseline_win = (0, 1)
baseline_win = (int(baseline_win[0] * sampling_rate), int(baseline_win[1] * sampling_rate))
reward_group = 'R+'

_, _, mice, _ = io.select_sessions_from_db(db_path,
                                            nwb_dir,
                                            two_p_imaging='yes',
                                            reward_group=reward_group)
print(mice)

['GF305', 'GF306', 'GF307', 'GF308', 'GF310', 'GF311', 'GF313', 'GF314', 'GF317', 'GF318', 'GF323', 'GF333', 'GF334', 'AR133', 'AR135', 'AR127', 'AR143', 'AR144']


  for idx, row in parser.parse():


In [None]:
output_dir = r'//sv-nas1.rcp.epfl.ch/Petersen-Lab/analysis/Anthony_Renard/analysis_output/functional_maps'
pdf_file = f'functional_maps_{reward_group}.pdf'
with PdfPages(os.path.join(output_dir, pdf_file)) as pdf:
    for mouse_id in mice:
        print(mouse_id)
        session_list, nwb_files, _, db_filtered = io.select_sessions_from_db(db_path,
                                                                            nwb_dir,
                                                                            two_p_imaging='yes',
                                                                            day=days,
                                                                            subject_id=mouse_id)
        print(session_list)
        data = []
        for session_id in session_list:
            arr, metadata = imaging_utils.load_session_2p_imaging(mouse_id,
                                                                session_id,
                                                                processed_dir)
            arr = imaging_utils.substract_baseline(arr, 3, baseline_win)
            data.append(arr)

        # Select UM trials.
        data = [arr[:, -1] for arr in data]
        # Remove trials with NaNs.
        data = [arr[:, ~np.isnan(arr).all(axis=(0,2))] for arr in data]

        # Load image masks.
        roi_masks = nwb_read.get_image_mask(nwb_files[0])
        roi_masks = np.stack(roi_masks, axis=0)
        
        # Compute significance map.
        # -------------------------
        
        # Compute average response and baseline for each trial, each day.
        baseline_avg = []
        response_avg = []
        for day in data:
            baseline_avg.append(np.nanmean(day[:, :, baseline_win[0]:baseline_win[1]], axis=2))
            response_avg.append(np.nanmean(day[:, :, win[0]:win[1]], axis=2))

        # Compare response amplitude to baseline.
        n_cells = data[0].shape[0]
        p_values = [np.zeros(n_cells) for _ in range(len(data))]
        for iday, day in enumerate(data):
            for icell in range(n_cells):
                _, p_values[iday][icell] = wilcoxon(baseline_avg[iday][icell], response_avg[iday][icell])
        p_values = np.stack(p_values, axis=0)

        # Categories p-values.
        p_values_masks = np.copy(p_values)
        p_values_masks[p_values>0.05] = 1
        p_values_masks[p_values<=0.05] = 2
        p_values_masks[p_values<=0.01] = 3
        p_values_masks[p_values<=0.001] = 4
        
        map_significance = []
        for iday in range(5):
            maps = roi_masks * p_values_masks[iday, :, None, None]
            map_significance.append(np.max(maps, axis=0))
            
        
        # Compute amplitude map.
        # ----------------------
        
        # Compute average response amplitude for each cell.
        response_amplitude = []
        for day in response_avg:
            response_amplitude.append(np.nanmean(day, axis=1))
        response_amplitude = np.stack(response_amplitude, axis=0)
        
        map_amplitude = []
        for iday in range(5):
            maps = roi_masks * response_amplitude[iday, :, None, None]
            map_amplitude.append(np.max(maps, axis=0))
    

        # Plot maps.
        # ----------
        
        f, axes = plt.subplots(2,5, figsize=(20, 8), sharex=True, sharey=True)
        
        # Plot amplitude maps.
        cmap = sns.color_palette("viridis", as_cmap=True)
        # vmin = np.nanmin(response_amplitude)
        vmin = 0
        vmax = np.percentile(response_amplitude, 98)
        
        for iday in range(5):
            a = axes[0,iday].imshow(map_amplitude[iday],
                                interpolation='nearest',
                                cmap=cmap,
                                vmin=vmin, vmax=vmax)
        cbar_ax = f.add_axes([.91,.124,.04,.754])
        f.colorbar(a, cax=cbar_ax, location='right')
        
        
        cmap = ['white', '#d9d9d9', '#fdbb84', '#ef6548', '#990000']
        cmap = colors.ListedColormap(cmap)
        bounds = range(cmap.N+1)
        norm = colors.BoundaryNorm(bounds, cmap.N)
        # Plot responsivity maps.
        for iday in range(5):
            axes[1, iday].imshow(map_significance[iday], cmap=cmap, norm=norm, interpolation='nearest')
            # axes[iday].imshow(map_significance[iday])
            
        plt.suptitle(mouse_id)
        pdf.savefig()
        plt.close()




## Learning modulation index (LMI)

Computing LMI maps.

What else other than maps do we want to look at? 


Check what is quantified in Drieu et al. to conclude no sensory plasticity.
- Number of neurons responsive across days.
- Variability of the response across days. But how to compute that? Correlation of the pop vector across trials could work well or cell by cell?
- Recompute the variability of the response. 
