# Extract and work with stimulated cells

In [1]:
# Imports:
%load_ext autoreload
%autoreload 2

import json
import numpy as np
import math
import psycopg2
import numpy as np
import pandas as pd
from datetime import date
from tqdm import tqdm_notebook

# Plotting:
from matplotlib import pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
import matplotlib as mpl
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

sns.set()

# External functions from subfolder /database_helpers. 
# as soon as you change something in there and press save, it will auto reload on next execution.
from database_helpers.psql_start import *
from database_helpers.create_tables import *
from database_helpers.write2tables import *
from postgres_analysis_helpers.general import *
from postgres_analysis_helpers.get_data import *
from postgres_analysis_helpers.circstat_functions import *
# register pickle type to retrieve binary data from database
psycopg2.extensions.register_type(psycopg2.extensions.new_type(psycopg2.BINARY.values, 'BINARY-PICKLE', cast_pickle))

Loaded analysis helpers: General
Loaded postgres_analysis_helpers -> general
Loaded postgres_analysis_helpers -> get_data
Loaded postgres_analysis_helpers -> circstat_functions


In [2]:
# Database connection check
db_status = test_connect()
if db_status == False:
    print('Grrr... no database connection could be established.')
else:
    print('Woot! Database connection is established!')

Connecting to the PostgreSQL database...
Woot! Database connection is established!


### Connect to the database 

In [3]:
stimulus_stats = pd.DataFrame() # create empty dataframe or feed in a base dataframe from before! 
psql = psql_neuroballs(stimulus_stats) # initialize retrieval

78 animals across 5 experimenters found.
Total # of sessions: 8632 (unique clusters: 15361)


**First retrieve all stimulus stats**. That means that only cells for which stimulus sessions were actually run are included in this retrieval step. No baseline sessions are present - only the laser sessions.
This is because the `stimulus_tb` table only contains laser stimulus session information - nothing else! 

In [4]:
dict_ = {'stimulus_tb': ''}
animals_ = ['74470','74471','74933','74935','74472','74473','74956','73358']
filter_ = "AND session_name NOT LIKE '%obj%' AND session_name NOT LIKE '%stab%' AND session_name NOT LIKE '%room%'"
stimulus_stats = psql.retrieve_data(dict_,animals_,filter_)

8772 entries retrieved.


### Set parameters

In [None]:
# Set parameters:
# excitation:
salt_i_thresh = 0.09
salt_p_thresh = 0.05
min_number_sessions = 2 # min number of session for which criteria above have to be fullfilled
pearson_r_thresh = 0.99 # waveform similarity cutoff

# ISI contamination (%)
isi_contam_thresh = 1


### Filter for stimulated cells 

The way to do it is as follows: 
- Filter the stimulus stats for **SALT p** and **SALT i** according to the user defined thresholds (see parameters above)
- Sort dataframe by **SALT i**
- Count sessions and filter for **session count** (number of sessions that meet the above criteria) - this gets rid of all duplicate sessions
- Get waveforms (on the same dataframe - meaning the waveforms of the laser sessions with highest SALT i) 
- Drop **NaNs in waveforms** column. This is legit because ultra low responsive / firing cells that cross the treshold by chance would have to be filtered out anyway. If a NaN appears in this column, the analysis did not run over this cell in this session because there were less than 50 spikes recorded. 
- Then drop the "session_name" column, to get rid of the laser session label and then run over the database with filter for "laser" in session_name to only retrieve baseline sessions (can be expanded with for example "object" to exclude other open field sessions as well). In that step retrieve the baseline waveforms and ISI contamination (%).
- Filter for **ISI contamination**
- Filter for **Waveform correlation** (median correlation of all waveform channel pairs > threshold)

In [None]:
# Filter for SALT p and SALT i 
excited_cells = stimulus_stats[(stimulus_stats.salt_i >= salt_i_thresh) & (stimulus_stats.salt_p < salt_p_thresh)].copy()
excited_cells.sort_values(by='salt_i',ascending=False,inplace=True)

excited_cells_rest = stimulus_stats[(stimulus_stats.salt_i < salt_i_thresh) & (stimulus_stats.salt_p >= salt_p_thresh)].copy()

In [None]:
excited_cells.head(3)

### Count sessions and filter for session number

In [None]:
# Group by and count - and also drop duplicates with sort
drop_subgroup = ['animal_id','n_drive_user','session_ts','cluster_no','tetrode_no']
no_sessions_excited = excited_cells.groupby(drop_subgroup).size().reset_index().copy()
excited_cells_sessions = excited_cells.drop_duplicates(drop_subgroup).sort_values(by=drop_subgroup).reset_index().copy()

# Quick sanity check (both have to be true)
if ((no_sessions_excited.animal_id.values == excited_cells_sessions.animal_id.values).all() != True) or ((no_sessions_excited.cluster_no.values == excited_cells_sessions.cluster_no.values).all() != True): 
    print('DATAFRAMES DO NOT MATCH!'); sys.exit()

In [None]:
excited_cells_sessions['no_sessions'] = no_sessions_excited.iloc[:,-1].values
len_before_no_session = len(excited_cells_sessions)
#overwrite original df
excited_cells = excited_cells_sessions[excited_cells_sessions.no_sessions >= min_number_sessions]

In [None]:
excited_cells.head(5)

### Get waveforms (laser sessions)

In [None]:
psql = psql_neuroballs(excited_cells) # initialize retrieval
dict_ = {'waveforms_tb':'mean_wf'}
excited_cells = psql.retrieve_data(dict_)

### Drop all NaNs in waveforms - this is valid if we want to include sessions only if there were > 50 spikes

In [None]:
len_before_drop_na_wf = len(excited_cells)

excited_cells.dropna(subset=['mean_wf'],axis=0,inplace=True)
excited_cells.rename(columns={'mean_wf':'mean_wf_laser'}, inplace=True) # rename the waveform column - "tag" with laser

### Get the stimulus_mat table data for later 

In [None]:
psql = psql_neuroballs(excited_cells) # initialize retrieval
dict_ = {'stimulus_mat_tb': ''}
excited_cells = psql.retrieve_data(dict_)

### Now drop the session_name column (to get rid of the laser session label) and retrieve waveforms again, but with a filter for non-laser sessions. Also include ISI contaminations in same go and filter for ISI contamination.

In [None]:
excited_cells.drop(['session_name','index'], axis=1,inplace=True)

In [None]:
psql = psql_neuroballs(excited_cells) # initialize retrieval
dict_ = {'waveforms_tb':'mean_wf','ISI_tb':'ISI_stats_contam_perc'}
excited_cells = psql.retrieve_data(user_sql_tables=dict_,user_sql_filter="NOT LIKE '%las%'")
# filter out ISI contaminations: 
len_before_isi_filt = len(excited_cells)

excited_cells = excited_cells[excited_cells.isi_stats_contam_perc < isi_contam_thresh].copy()

### Calculate pearson's R between waveforms and throw away entries that are lower than similarity threshold

In [None]:
wf_r,wf_p = corr_wf_base_laser(excited_cells, 'mean_wf', 'mean_wf_laser', plotting=False)
excited_cells['pearson_r_wf'] = wf_r
len_before_corr_filt = len(excited_cells)

excited_cells = excited_cells[excited_cells.pearson_r_wf > pearson_r_thresh].copy()

In [None]:
plt.hist(wf_r,bins=200);
plt.xlim(0.96,1)
plt.axvline(x=pearson_r_thresh,color='k');sns.despine(left=True,bottom=True)
plt.title('Pearson correlations waveforms base vs. laser')

In [None]:
# Sort again - why not ... 
excited_cells.sort_values(by='salt_i',ascending=False,inplace=True)

### Little summary of where how many cells were lost 

In [None]:
# Summary:
print('Number of cells before ...')
print('No. of session filter: {}'.format(len_before_no_session))
print('Drop na waveforms: {}'.format(len_before_drop_na_wf))
print('ISI filter: {}'.format(len_before_isi_filt))
print('Waveforms correlation filter: {}'.format(len_before_corr_filt))
print('Remaining: {}'.format(len(excited_cells)))

### Draw some stuff

In [None]:
# Draw some examples of baseline and laser mean waveforms, and then PSTHs
draw_waveforms(excited_cells,'mean_wf',15,0)
draw_waveforms(excited_cells,'mean_wf_laser',15,0)
create_spike_plots_stimulus(excited_cells,15,0,True)

In [None]:
# Display a subset of the dataframe to check if everything went well
excited_cells[['animal_id','filename','tetrode_no','cluster_no','session_name','salt_i','salt_p']].head()

In [None]:
#excited_cells.ex_latency_median.hist(bins=20)
figure = plt.figure(figsize=(10,4))
ax = figure.add_subplot(121)
sns.kdeplot(excited_cells.ex_latency_median, bw=.5, label="Exc lat median",color='k',ax=ax)
sns.rugplot(excited_cells.ex_latency_median,color='r',ax=ax)
ax = figure.add_subplot(122)
sns.kdeplot(excited_cells.ex_latency_mean, bw=.5, label="Exc lat mean",color='k',ax=ax)
sns.rugplot(excited_cells.ex_latency_mean,color='r',ax=ax)
sns.despine(left=True,bottom=True)

### Save dataframe to pickle

In [None]:
excited_cells.to_pickle('dataframe_export/excited_cells.pkl')

In [None]:
len(excited_cells)

### To retrieve more data ... 

In [None]:
#psql = psql_neuroballs(excited_cells) # initialize retrieval
#psql.retrieve_data()

In [None]:
#data = psql.data()

In [None]:
# for example ... 
#draw_ratemaps(data,'masked_ratemap',15,0)

In [None]:
#data.columns