# Exploration.ipynb
## Author: Elliot Pallister

Notebook for exploration in the Pareto analysis project

In [1]:
# Collecting necessary imports

# External imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Internal imports
from pareto.data_io import get_cache, get_session, get_unit_channels, get_spike_times, get_stimulus_presentations, get_units_by_area, get_trials
from pareto.preprocessing import get_trial_stimulus_onsets, get_image_trials, arrange_image_onsets_to_trial, group_stims_by_frame_index, trial_number_histogram, spike_counts, spike_rates, time_window, make_psth, make_psth_matrix, trial_frame_psth_matrix, trial_frame_psth_cube, unit_idx_from_id, subtract_baseline, zscore_responses
from pareto.plotting import plot_grid, plot_unit_traces, plot_frame_avg, frame_index_histogram, plot_unit_psths_for_frames
from pareto.stats import visual_selectivity_filter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Importing the cache from AllenSDK
cache = get_cache()

# Using session ID 1044385384
session_id = 1044385384
session = get_session(session_id)

units = get_unit_channels(session)
trials = get_trials(session)
stimulus_presentations = get_stimulus_presentations(session)
spike_times = get_spike_times(session)

core - cached version: 2.6.0-alpha, loaded version: 2.7.0
  self.warn_for_ignored_namespaces(ignored_namespaces)


Firstly, I want to filter my units based on:

1. Quality metrics (SNR, interspike interval violations and firing rate)
2. Area (starting with VISp)

In [3]:
quality_unit_filter = ((units['snr'] >= 1) & (units['isi_violations'] < 1) & (units['firing_rate'] > 0.1))
quality_units = units.loc[quality_unit_filter].copy()

area_of_interest = ['VISp']
area_units = get_units_by_area(quality_units, area_of_interest)

print(f'Number of filtered units in {area_of_interest}: {area_units.shape[0]}')


Number of filtered units in ['VISp']: 84


Next, I want to find the stimulus onsets for the image with id: im036_r.

I will use these onsets to:

1. Statistically test the unit responses for selectivity to the image, calculating effect sizes and p values, using a Wilcoxon paired rank test
2. Update the area units dataframe to contain effect sizes and p values in a column
3. Filter the area units by selectivity for the stimulus

In [4]:
stim_of_interest = 'im036_r'
stim_onsets = stimulus_presentations[stimulus_presentations['image_name'] == stim_of_interest]['start_time'].values

# Define the time before the image and the duration of the window during which spikes are counts
time_before_stim = 0.25
duration = 0.5

# Statistical testing
selectivity_mask, effects, p_values = visual_selectivity_filter(area_units, spike_times, stim_onsets, time_before_stim, duration)

area_units = area_units.copy()

# Assigning effect size and p_values
area_units.loc[:, 'p_values'] = p_values
area_units.loc[:, 'effect_size'] = effects

visual_area_units = area_units[selectivity_mask]

print(f'Number of units selective for {stim_of_interest} in {area_of_interest}: {visual_area_units.shape[0]}')

Number of units selective for im036_r in ['VISp']: 54


Since I now have the filtered units, I want to perform some preprocessing to obtain:

1. Each of the trials in which im036_r was shown
2. Image onsets aligned to trial frames 1-12, collecting image onsets for the nth frame in each trial
3. A data cube containing PSTH traces for each unit across each of the onsets in the nth frame (frame x unit x time)

In [5]:
image_trials = get_image_trials(trials, stim_of_interest, stim_onsets)
trials_with_frames = arrange_image_onsets_to_trial(image_trials)
stim_frame_onsets = group_stims_by_frame_index(trials_with_frames)

unit_frame_psth_matrix, frame_ids, unit_ids, bins = trial_frame_psth_cube(visual_area_units, spike_times, stim_frame_onsets, time_before_stim, duration, bin_size=0.01)

[0.   0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1  0.11 0.12 0.13
 0.14 0.15 0.16 0.17 0.18 0.19 0.2  0.21 0.22 0.23 0.24 0.25 0.26 0.27
 0.28 0.29 0.3  0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4  0.41
 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5 ]


In [6]:
visual_units_by_effect = visual_area_units.sort_values(by='effect_size', ascending=False).copy()


labels = ["Q1 (lowest)", "Q2", "Q3", "Q4 (highest)"]
visual_units_by_effect.loc[:, 'effect_quartile'] = pd.qcut(visual_units_by_effect['effect_size'], q=4, labels=labels)

picked_units = (visual_units_by_effect.groupby("effect_quartile", group_keys=False).apply(lambda g: g.sample(n=1, random_state=42)))

print(picked_units['effect_size'])

id
1049375405    0.853556
1049375379    1.226946
1049375383    1.734596
1049375318    2.460524
Name: effect_size, dtype: float64


In [None]:
for uid in picked_units.index.to_list():
  u = unit_idx_from_id(unit_ids, uid)
  _ = plot_unit_psths_for_frames(u, unit_frame_psth_matrix, frame_ids, bins, time_before_stim)

plt.show()

NameError: name 'picked_units' is not defined