In [1]:
%load_ext autoreload
%autoreload 2
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
from os.path import join as pjoin
from tqdm.notebook import tqdm
import plotly.graph_objects as go
import pingouin as pg
from scipy.stats import pearsonr, spearmanr, zscore
from natsort import natsorted
import itertools

sys.path.append('/home/austinbaggetta/csstorage3/CircleTrack/CircleTrackAnalysis')
import circletrack_behavior as ctb
import circletrack_neural as ctn
import place_cells as pc
import plotting_functions as pf

  return warn(


In [7]:
## Settings
project_folder = ['MultiCon_Imaging']
experiment_folders = ['MultiCon_Imaging4', 'MultiCon_Imaging5']
dpath = f'../../{project_folder[0]}'
fig_path = f'../../../Manuscripts/MultiCon/intermediate_plots'
chance_color = 'darkgrey'
avg_color = 'midnightblue'
subject_color = 'darkgrey'
ce_colors = ['darkgrey', 'midnightblue']
ce_color_dict = {'Control': 'darkgrey', 'Experimental': 'midnightblue'}
mouse_colors = ['midnightblue', 'darkred', 'darkorchid', 'darkturquoise']
male_mice = ['mc42', 'mc43', 'mc44', 'mc46']
control_mice = ['mc46', 'mc47', 'mc49', 'mc52']
imaging4 = ['mc42', 'mc43']
session_list = [f'A{x}' for x in np.arange(1, 6)] + [f'B{x}' for x in np.arange(1, 6)] + [f'C{x}' for x in np.arange(1, 6)] + [f'D{x}' for x in np.arange(1, 6)]
control_list = [f'A{x}' for x in np.arange(1, 16)] + [f'B{x}' for x in np.arange(1, 6)]
day_list = [f'{x}' for x in np.arange(1, 21)]
bin_size = 0.4
velocity_thresh = 14
centroid_distance = 5
data_of_interest = 'aligned_minian' ## one of behav, aligned_minian, lin_behav

if not os.path.exists(fig_path):
    os.makedirs(fig_path)

xr.set_options(keep_attrs=True)

<xarray.core.options.set_options at 0x7fa0fa913c70>

### Calculate within-session stability of the first half vs second half and odd trials vs even trials (only correct direction trials). Uses all cells.

In [148]:
data_type = 'S'
cell_dict = {'mouse': [], 'group': [], 'group_two': [], 'session': [], 'day': [], 'unit_id': [], 
             'first_vs_second_stability': [], 'odd_vs_even_stability': []}
for experiment in os.listdir(dpath):
    if experiment not in experiment_folders:
        pass 
    else:
        exp_path = pjoin(dpath, f'{experiment}/output/{data_of_interest}/')
        for mouse in os.listdir(exp_path):
            if mouse == 'mc47':
                pass
            else:
                mpath = pjoin(exp_path, f'{mouse}/{data_type}')
                sex = 'Male' if mouse in male_mice else 'Female'
                group = 'Control' if mouse in control_mice else 'Experimental'
                for index, session in enumerate(os.listdir(mpath)):
                    if (mouse == 'mc43') & (index > 11):
                        index += 1
                    elif (mouse == 'mc42') & (index > 14):
                        index += 1
                    elif (mouse == 'mc44') & (index > 7):
                        index += 1
                    elif (mouse == 'mc46') & (index > 9):
                        index += 1
                    elif (mouse == 'mc52') & (index > 2):
                        index += 1
                    
                    sdata = xr.open_dataset(pjoin(mpath, session))[data_type]
                    x_cm, y_cm = ctb.convert_to_cm(x=sdata['x'].values, y=sdata['y'].values)
                    velocity, running = pc.define_running_epochs(x_cm, 
                                                                 y_cm, 
                                                                 sdata['behav_t'].values, 
                                                                 velocity_thresh=velocity_thresh)
                    data = sdata[:, running].copy()
 
                    ## First half vs second half stability
                    first_half = data[:, :int(data.shape[1] / 2)]
                    second_half = data[:, int(data.shape[1] / 2):]
                    bins = ctb.calculate_bins(x=data['lin_position'].values, bin_size=bin_size)

                    first_pop_act = np.zeros((data.shape[0], len(bins)))
                    for idx, (start, end) in enumerate(zip(bins[:-1], bins[1:])):
                        binned_data = first_half.values[:, (first_half['lin_position'] >= start) & (first_half['lin_position'] < end)]
                        avg_activity = np.mean(binned_data, axis=1)
                        first_pop_act[:, idx] = avg_activity

                    second_pop_act = np.zeros((data.shape[0], len(bins)))
                    for idx, (start, end) in enumerate(zip(bins[:-1], bins[1:])):
                        binned_data = second_half.values[:, (second_half['lin_position'] >= start) & (second_half['lin_position'] < end)]
                        avg_activity = np.mean(binned_data, axis=1)
                        second_pop_act[:, idx] = avg_activity

                    for cell in np.arange(0, data.shape[0]):
                        cell_dict['mouse'].append(mouse)
                        cell_dict['group'].append(group)
                        cell_dict['group_two'].append(sex)
                        cell_dict['session'].append(data.attrs['session_two'])
                        cell_dict['day'].append(index+1)
                        cell_dict['unit_id'].append(data['unit_id'].values[cell])
                        cell_dict['first_vs_second_stability'].append(pearsonr(first_pop_act[cell], second_pop_act[cell])[0])
                    
                    ## Odd vs even stability
                    forward, reverse = ctb.forward_reverse_trials(data, data['trials'])
                    odd_data = data[:, data['trials'] == forward[0]]
                    even_data = data[:, data['trials'] == forward[1]]
                    for trial in forward[2:]:
                        trial_data = data[:, data['trials'] == trial]
                        if trial % 2 != 0:
                            odd_data = xr.concat([odd_data, trial_data], dim='frame')
                        else:
                            even_data = xr.concat([even_data, trial_data], dim='frame')

                    odd_pop_act = np.zeros((data.shape[0], len(bins)))
                    even_pop_act = np.zeros((data.shape[0], len(bins)))
                    for idx, (start, end) in enumerate(zip(bins[:-1], bins[1:])):
                        binned_data = odd_data.values[:, (odd_data['lin_position'] >= start) & (odd_data['lin_position'] < end)]
                        avg_activity = np.mean(binned_data, axis=1)
                        odd_pop_act[:, idx] = avg_activity

                        binned_data = even_data.values[:, (even_data['lin_position'] >= start) & (even_data['lin_position'] < end)]
                        avg_activity = np.mean(binned_data, axis=1)
                        even_pop_act[:, idx] = avg_activity
                    
                    for cell in np.arange(0, data.shape[0]):
                        cell_dict['odd_vs_even_stability'].append(pearsonr(odd_pop_act[cell], even_pop_act[cell])[0])    
cell_df = pd.DataFrame(cell_dict)     


invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.


invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.


An input array is constant; the correlation coefficient is not defined.


invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.


An input array is constant; the correlation coefficient is not defined.


invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.


An input array is constant; the correlation coefficient is not defined.


invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.


An input array is constant; the correlation coefficient is not defined.


invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.


An input array is constant; the

In [149]:
cell_df.to_csv(pjoin(fig_path, 'intermediate_data/within_session_stability.csv'))

In [None]:
## Use Fisher Z's transformation for each cell's Pearson correlation value
cell_dict = {'mouse': [], 'day': [], 'group': [], 'first_vs_second': [], 'odd_vs_even': [], 'within_sess': []}
for mouse in cell_df['mouse'].unique():
    mdata = cell_df[cell_df['mouse'] == mouse]
    for day in mdata['day'].unique():
        day_data = mdata[mdata['day'] == day].reset_index(drop=True)
        
        nancells = np.where(pd.isna(day_data['first_vs_second_stability']) | pd.isna(day_data['odd_vs_even_stability']))[0]
        day_data = day_data[~day_data.index.isin(nancells)] ## remove any cells with NaN

        first = day_data['first_vs_second_stability'].to_numpy()
        odd = day_data['odd_vs_even_stability'].to_numpy()
        try:
            first[first == 1.0] = 0.99999 ## Fisher Z is indeterminate at a perfect correlation
        except:
            pass

        try:
            odd[odd == 1.0] = 0.99999
        except:
            pass

        zr_first = np.arctanh(first)
        zr_odd = np.arctanh(odd)

        cell_dict['mouse'].append(mouse)
        cell_dict['day'].append(day)
        cell_dict['group'].append(day_data['group'].unique()[0])
        cell_dict['first_vs_second'].append(np.mean(zr_first))
        cell_dict['odd_vs_even'].append(np.mean(zr_odd))
        cell_dict['within_sess'].append(np.mean((zr_first + zr_odd) / 2))
within_df = pd.DataFrame(cell_dict)

In [None]:
yvar = 'within_sess'
avg_data = within_df.groupby(['group', 'day'], as_index=False).agg({'first_vs_second': ['mean', 'sem'],
                                                                    'odd_vs_even': ['mean', 'sem'],
                                                                    'within_sess': ['mean', 'sem']})
fig = pf.custom_graph_template(x_title='Day', y_title='Stability')
for group in avg_data['group'].unique():
    gdata = avg_data[avg_data['group'] == group]
    fig.add_trace(go.Scatter(x=gdata['day'], y=gdata[yvar]['mean'], mode='lines+markers',
                             line_color=ce_color_dict[group], error_y=dict(type='data', array=gdata[yvar]['sem']),
                             legendgroup=group, name=group))
for value in [5.5, 10.5, 15.5]:
    fig.add_vline(x=value, line_width=1, line_dash='dash', line_color=chance_color, opacity=1)
fig.show()
fig.write_image(pjoin(fig_path, f'{yvar}_stability_across_days_groups.png'))
within_df.mixed_anova(within='day', between='group', dv=yvar, subject='mouse')

In [None]:
fig = pf.custom_graph_template(x_title='Day', y_title='Stability')
for mouse in within_df['mouse'].unique():
    mdata = within_df[within_df['mouse'] == mouse]
    group = mdata['group'].unique()[0]
    fig.add_trace(go.Scatter(x=mdata['day'], y=mdata[yvar], mode='lines',
                             line_color=ce_color_dict[group], name=mouse,
                             line=dict(width=1), opacity=0.8))
for value in [5.5, 10.5, 15.5]:
    fig.add_vline(x=value, line_width=1, line_dash='dash', line_color=chance_color, opacity=1)
fig.write_image(pjoin(fig_path, f'{yvar}_stability_across_days_per_mouse.png'))
fig.show()

In [None]:
fig = pf.custom_graph_template(x_title='', y_title='', rows=4, columns=5, shared_y=True, 
                               shared_x=True, width=900, height=900)
var = 'within_sess'

for group in cell_df['group'].unique():
    gdata = cell_df[cell_df['group'] == group]
    for index, day in enumerate(gdata['day'].unique()):
        day_data = gdata[gdata['day'] == day].reset_index(drop=True)
        
        nancells = np.where(pd.isna(day_data['first_vs_second_stability']) | pd.isna(day_data['odd_vs_even_stability']))[0]
        day_data = day_data[~day_data.index.isin(nancells)] ## remove any cells with NaN

        first = day_data['first_vs_second_stability'].to_numpy()
        odd = day_data['odd_vs_even_stability'].to_numpy()
        try:
            first[first == 1.0] = 0.999 ## Fisher Z is indeterminate at a perfect correlation
        except:
            pass

        try:
            odd[odd == 1.0] = 0.999
        except:
            pass

        zr_first = np.arctanh(first)
        zr_odd = np.arctanh(odd)
        within_sess = (zr_first + zr_odd) / 2
        
        if var == 'within_sess':
            plot_data = within_sess
        elif var == 'odd_even':
            plot_data = zr_odd
        elif var == 'first_second':
            plot_data = zr_first

        ## Plot figure
        if index < 5:
            row, col = 1, index + 1
        elif (index >= 5) & (index < 10):
            row, col = 2, index - 4
        elif (index >= 10) & (index < 15):
            row, col = 3, index - 9
        elif index >= 15:
            row, col = 4, index - 14
            
        counts, xbins = np.histogram(plot_data, bins=40, density=True)
        fig.add_trace(go.Bar(x=xbins, y=counts, marker_color=ce_color_dict[group],
                             showlegend=False), row=row, col=col)
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.70)
fig.update_xaxes(title='Stability', row=4)
fig.update_yaxes(title='Density', col=1)
fig.show()

### Plot relationship between first_vs_second stability and odd_vs_even stability for control vs experimental.

In [None]:
num_plots = 20
nrows = 4
ncols = int(num_plots / nrows)
fig = pf.custom_graph_template(x_title='', y_title='', rows=4, columns=5, titles=day_list,
                               width=1000, height=1000, shared_y=True, shared_x=True)
for group in ['Experimental']:
    gdata = cell_df[cell_df['group'] == group].reset_index(drop=True)
    for index, day in enumerate(natsorted(gdata['day'].unique())):
        ## Plot figure
        if index < 5:
            row, col = 1, index + 1
        elif (index >= 5) & (index < 10):
            row, col = 2, index - 4
        elif (index >= 10) & (index < 15):
            row, col = 3, index - 9
        elif index >= 15:
            row, col = 4, index - 14
        day_data = gdata[gdata['day'] == day]
        fig.add_trace(go.Scatter(x=day_data['first_vs_second_stability'], y=day_data['odd_vs_even_stability'],
                                 mode='markers', marker_color=ce_color_dict[group], showlegend=False,
                                 opacity=0.5), row=row, col=col)
fig.update_yaxes(title='Pearson r', col=1)
fig.update_xaxes(title='Pearson r', row=4)
fig.show()

### Across-session stability

In [142]:
data_type = 'S'
cell_dict = {'mouse': [], 'group': [], 'sex': [], 'session_one': [], 'session_two': [], 'unit_id_one': [], 'unit_id_two': [], 'stability': []}
mouse = 'mc42'
sess_list = [f'{mouse}_{data_type}_{x}.nc' for x in np.arange(11, 16)]
# sess_list = [f'{mouse}_{data_type}_{x}.nc' for x in np.arange(11, 13)] + [f'{mouse}_{data_type}_{x}.nc' for x in np.arange(14, 16)]
file_name_suffix = 'contextC'

if mouse in imaging4:
    experiment = experiment_folders[0]
else:
    experiment = experiment_folders[1]

exp_path = pjoin(dpath, f'{experiment}/output/{data_of_interest}/')
mpath = pjoin(exp_path, f'{mouse}/{data_type}')
sex = 'Male' if mouse in male_mice else 'Female'
group = 'Control' if mouse in control_mice else 'Experimental'
if mouse in imaging4:
    crossreg_path = pjoin(dpath, f'{experiment_folders[0]}/output/cross_registration_results')
else:
    crossreg_path = pjoin(dpath, f'{experiment_folders[1]}/output/cross_registration_results')
mappings = pd.read_pickle(pjoin(crossreg_path, f'circletrack_data/{mouse}/mappings_{centroid_distance}.pkl'))
mappings.columns = mappings.columns.droplevel(0)

for d1, d2 in itertools.combinations_with_replacement(sess_list, r=2):
    print((d1, d2))
    sess_one = xr.open_dataset(pjoin(mpath, d1))[data_type]
    sess_two = xr.open_dataset(pjoin(mpath, d2))[data_type]

    if d1 == d2:
        for cell in np.arange(0, sess_one.shape[0]):
            cell_dict['mouse'].append(mouse)
            cell_dict['group'].append(group)
            cell_dict['sex'].append(sex)
            cell_dict['session_one'].append(sess_one.attrs['session_two'])
            cell_dict['session_two'].append(sess_two.attrs['session_two'])
            cell_dict['unit_id_one'].append(sess_one['unit_id'].values[cell])
            cell_dict['unit_id_two'].append(sess_two['unit_id'].values[cell])
            cell_dict['stability'].append(np.nan)

    else:
        shared_cells = mappings[[sess_one.attrs['date'], sess_two.attrs['date']]].dropna().reset_index(drop=True)
        shared_one = sess_one.sel(unit_id=shared_cells[sess_one.attrs['date']].values)
        shared_two = sess_two.sel(unit_id=shared_cells[sess_two.attrs['date']].values)

        forward_one, _ = ctb.forward_reverse_trials(shared_one, shared_one['trials'])
        forward_two, _ = ctb.forward_reverse_trials(shared_two, shared_two['trials'])

        first_sess = shared_one[:, shared_one['trials'] == forward_one[0]]
        for trial in forward_one[1:]:
            loop_sess = shared_one[:, shared_one['trials'] == trial]
            first_sess = xr.concat([first_sess, loop_sess], dim='frame')
        
        second_sess = shared_two[:, shared_two['trials'] == forward_two[0]]
        for trial in forward_two[1:]:
            loop_sess = shared_two[:, shared_two['trials'] == trial]
            second_sess = xr.concat([second_sess, loop_sess], dim='frame')
        
    
        x_cm, y_cm = ctb.convert_to_cm(x=first_sess['x'].values, y=first_sess['y'].values)
        velocity, running = pc.define_running_epochs(x_cm, 
                                                        y_cm, 
                                                        first_sess['behav_t'].values, 
                                                        velocity_thresh=velocity_thresh)
        first_sess = first_sess[:, running]

        x_cm, y_cm = ctb.convert_to_cm(x=second_sess['x'].values, y=second_sess['y'].values)
        velocity, running = pc.define_running_epochs(x_cm, 
                                                        y_cm, 
                                                        second_sess['behav_t'].values, 
                                                        velocity_thresh=velocity_thresh)
        second_sess = second_sess[:, running]

        bins_one = ctb.calculate_bins(x=first_sess['lin_position'].values, bin_size=bin_size)
        bins_two = ctb.calculate_bins(x=second_sess['lin_position'].values, bin_size=bin_size)

        first_pop_act = np.zeros((first_sess.shape[0], len(bins_one)))
        for idx, (start, end) in enumerate(zip(bins_one[:-1], bins_one[1:])):
            binned_data = first_sess.values[:, (first_sess['lin_position'] >= start) & (first_sess['lin_position'] < end)]
            avg_activity = np.mean(binned_data, axis=1)
            first_pop_act[:, idx] = avg_activity

        second_pop_act = np.zeros((second_sess.shape[0], len(bins_two)))
        for idx, (start, end) in enumerate(zip(bins_two[:-1], bins_two[1:])):
            binned_data = second_sess.values[:, (second_sess['lin_position'] >= start) & (second_sess['lin_position'] < end)]
            avg_activity = np.mean(binned_data, axis=1)
            second_pop_act[:, idx] = avg_activity

        for cell in np.arange(0, first_sess.shape[0]):
            cell_dict['mouse'].append(mouse)
            cell_dict['group'].append(group)
            cell_dict['sex'].append(sex)
            cell_dict['session_one'].append(sess_one.attrs['session_two'])
            cell_dict['session_two'].append(sess_two.attrs['session_two'])
            cell_dict['unit_id_one'].append(first_sess['unit_id'].values[cell])
            cell_dict['unit_id_two'].append(second_sess['unit_id'].values[cell])
            cell_dict['stability'].append(pearsonr(first_pop_act[cell], second_pop_act[cell])[0])
stability_df = pd.DataFrame(cell_dict)
stability_df.to_csv(pjoin(fig_path, f'intermediate_data/{mouse}_cross_day_stability_{file_name_suffix}.csv'))

('mc42_S_11.nc', 'mc42_S_11.nc')
('mc42_S_11.nc', 'mc42_S_12.nc')



invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.



('mc42_S_11.nc', 'mc42_S_13.nc')



invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.



('mc42_S_11.nc', 'mc42_S_14.nc')



invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.



('mc42_S_11.nc', 'mc42_S_15.nc')



invalid value encountered in divide



('mc42_S_12.nc', 'mc42_S_12.nc')
('mc42_S_12.nc', 'mc42_S_13.nc')



invalid value encountered in divide



('mc42_S_12.nc', 'mc42_S_14.nc')



invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.



('mc42_S_12.nc', 'mc42_S_15.nc')



invalid value encountered in divide


An input array is constant; the correlation coefficient is not defined.



('mc42_S_13.nc', 'mc42_S_13.nc')
('mc42_S_13.nc', 'mc42_S_14.nc')



invalid value encountered in divide



('mc42_S_13.nc', 'mc42_S_15.nc')



invalid value encountered in divide



('mc42_S_14.nc', 'mc42_S_14.nc')
('mc42_S_14.nc', 'mc42_S_15.nc')
('mc42_S_15.nc', 'mc42_S_15.nc')



invalid value encountered in divide



In [147]:
test = stability_df[(stability_df['session_one'] == 'C1') & (stability_df['session_two'] == 'C5')]
fig = pf.custom_graph_template(x_title='Pearson r', y_title='Probability')
fig.add_trace(go.Histogram(x=test['stability'], marker_color=ce_color_dict[stability_df['group'].unique()[0]], histnorm='probability'))
fig.show()

In [143]:
avg_stability = stability_df.groupby(['session_one', 'session_two'], as_index=False).agg({'stability': ['mean', 'sem']})
fig = pf.custom_graph_template(x_title='', y_title='', width=600)
fig.add_trace(go.Heatmap(x=avg_stability['session_one'], y=avg_stability['session_two'], 
                         z=avg_stability['stability']['mean'], colorscale='viridis'))
fig.data[0].colorbar.title="Pearson's r"
fig.show()