<a href="https://colab.research.google.com/github/grace3999/wireless_fiber_photometry/blob/master/FP_PDsingle_TTL_trial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#getting and working with data
import pandas as pd
import numpy as np
import re
import os
import datetime as dt
import string
from numpy import trapz

from scipy import ndimage
from scipy import signal as ss
from scipy.optimize import curve_fit

#visualizing results
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_context('poster', rc={'font.size':35,
                              'axes.titlesize':50,
                              'axes.labelsize':35})

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 15000)
pd.set_option('display.max_colwidth', -1)

import warnings; warnings.simplefilter('ignore')
np.set_printoptions(suppress=True)

  pd.set_option('display.max_colwidth', -1)


#### Get paths - separate folders for each day/animal

In [75]:
outer_path = '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced'

outer_file_list = os.listdir(outer_path)

outer_path_list = []

for name in outer_file_list:
    int_path = outer_path + '/' + name
    outer_path_list.append(int_path)

print(len(outer_path_list))
outer_path_list

44


['/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1610_211129',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1609_211129',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1597_211130',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1600_211129',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1608_211129',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1607_211129',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1596_211130',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_75.1_1605_211129',
 '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/F

### Game plan:

- read in fiber photometry and TTL txt files and combine into dataframe
- convert TTL logic into event info
- create list of trial times  based on event info
- use trial times to make 1 trace per trial containing:
  - 10 seconds baseline (previous ITI)
  - 5 seconds lever extend -> lever press
  - 5 seconds lever press -> head entry for pellet
  - 10 seconds postline (start of ITI)

In [None]:
# ITI ends                        10 seconds prior to HE trial start
# HE/levers extend                times_lever_extend = []
# lever press/reward/ITI start    times_ITI_start = []
# reward HE                       times_HE_pellet = []
# ITI ends                        20 seconds after reward HE

# ITI start -> levers extend (head entry) -> lever press (reward) -> ITI start

#make_FP_TTL_list(path)                                                                return FP_TTL_list
#make_df_from_file_list(FP_TTL_list)                                                   return TTL_dataframe
#make_event_from_TTL(TTL_dataframe)                                                    return TTL_dataframe_session
#get_trial_times(TTL_dataframe_session)                                                return times_lever_extend, times_lever_press, times_pellet, times_HE_pellet
#make_times_df(times_lever_extend, times_lever_press, times_pellet, times_HE_pellet)   return times_df
#make_trials_df(times_df, TTL_dataframe_session)                                       return trials_df

### Functions

In [4]:
def make_FP_TTL_list(path):
  TTL_list = []
  FP_list = []

  inner_file_list = os.listdir(outer_path)
    
  for inner_file in inner_file_list:
        
      if inner_file.split('.')[-1]=='TXT':
          int_path = outer_path + '/' + inner_file
          TTL_list.append(int_path)
      else:
          int_path = outer_path + '/' + inner_file
          FP_list.append(int_path)
            
  FP_TTL_list = list(zip(sorted(FP_list), sorted(TTL_list)))

  return FP_TTL_list

In [5]:
def make_df_from_file_list(FP_TTL_list):

  TTL_dataframe = pd.DataFrame(columns=['FP_signal', 'TTL'])
  
  i=0

  for file_pair in FP_TTL_list:

    #get meta data from file path
    task = file_pair[0].split('/')[-2].split('_')[0]
    session = file_pair[0].split('/')[-2].split('_')[1]
    animal = file_pair[0].split('/')[-2].split('_')[2]
    date = file_pair[0].split('/')[-2].split('_')[3]

    #get FP and TTL data and convert to list and put in dataframe
    data_int_file = pd.DataFrame(columns=['FP_signal', 'TTL'])
    
    #FP data
    data_FP = pd.melt(pd.read_table(file_pair[0], header=None).T)
    #TTL data
    data_TTL = pd.melt(pd.read_table(file_pair[1], header=None).T)

    data_int_file['FP_signal'] = data_FP['value']
    data_int_file['TTL'] = data_TTL['value']
    data_int_file['file'] = i

    TTL_dataframe = pd.concat([TTL_dataframe, data_int_file], ignore_index=True)
    
    TTL_dataframe['date'] = date
    TTL_dataframe['task'] = task
    TTL_dataframe['session'] = session
    TTL_dataframe['animal'] = animal
      
    i+=1

  return TTL_dataframe

In [6]:
def make_event_from_TTL(TTL_dataframe):
  
  TTL_dataframe = TTL_dataframe.reset_index().sort_values('index')
  TTL_dataframe['binary'] = [format(x, "08b") for x in TTL_dataframe['TTL']]

  TTL_prior = list(TTL_dataframe['binary'][0:-1])
  TTL_prior.insert(0,np.nan)
  TTL_dataframe['binary_prior'] = TTL_prior

  TTL_dataframe['binary_start'] = TTL_dataframe.apply(lambda x : 0 if x['binary'] == x['binary_prior'] else x['binary'], axis=1)

  #reset index to get time column

  TTL_dataframe['head_entry'] = [x[7] for x in TTL_dataframe['binary']]
  TTL_dataframe['left_extend'] = [x[6] for x in TTL_dataframe['binary']]
  TTL_dataframe['right_extend'] = [x[5] for x in TTL_dataframe['binary']]
  TTL_dataframe['left_press'] = [x[4] for x in TTL_dataframe['binary']]
  TTL_dataframe['right_press'] = [x[3] for x in TTL_dataframe['binary']]
  TTL_dataframe['pellet'] = [x[2] for x in TTL_dataframe['binary']]
  TTL_dataframe['session_start'] = [x[1] for x in TTL_dataframe['binary']]
  TTL_dataframe['ITI_start'] = [x[0] for x in TTL_dataframe['binary']]

  #remove start of data 
  start = TTL_dataframe[(TTL_dataframe['binary_start']!=0)&(TTL_dataframe['session_start']=='0')]['index'].values[0]
  TTL_dataframe_session = TTL_dataframe[(TTL_dataframe['index']>=start)]
  TTL_dataframe_session['session_time'] = np.arange(0, TTL_dataframe_session.shape[0])

  return TTL_dataframe_session

In [52]:
def get_trial_times(TTL_dataframe_session):

  times_lever_extend = []
  times_lever_press = []
  times_pellet = []
  times_HE_pellet = []
  left_right_extend = []

  times = TTL_dataframe_session[(TTL_dataframe_session['binary_start']!=0)&((TTL_dataframe_session['right_extend']=='0')|(TTL_dataframe_session['left_extend']=='0'))]['session_time'].values
  prev_time = -5000
  for time in times:
  
    if time - prev_time < 5000:
      print('skip')
      continue

    if time - prev_time > 5000:
      times_lever_extend.append((time))

      #trial type
      if TTL_dataframe_session[(TTL_dataframe_session['session_time']==time)]['left_extend'].values[0]=='0':
        left_right_extend.append('left')
      else:
        left_right_extend.append('right')

      #find first lever press (ITI start) after lever extend
      df_after_extend = TTL_dataframe_session[TTL_dataframe_session['session_time']>=time]
      try:
        extend_press_time = df_after_extend[(df_after_extend['binary_start']!=0)&((df_after_extend['right_press']=='0')|(df_after_extend['left_press']=='0'))]['session_time'].values[0]
        #must press in 10 seconds
        if extend_press_time < (time+10000):
          times_lever_press.append(extend_press_time)

          #find first pellet after lever press (ITI start)
          df_after_press = df_after_extend[df_after_extend['session_time']>=extend_press_time]
          try:
            #should happen within seconds
            press_pellet_time = df_after_press[(df_after_press['binary_start']!=0)&(df_after_press['pellet']=='0')]['session_time'].values[0]
            if press_pellet_time < (extend_press_time+5000):
              times_pellet.append(press_pellet_time)

              #find first HE after lever press (ITI start) 
              df_after_pellet = df_after_press[df_after_press['session_time']>=press_pellet_time]
              try:
              #should happen within seconds
                pellet_HE_time = df_after_pellet[(df_after_pellet['binary_start']!=0)&(df_after_pellet['head_entry']=='0')]['session_time'].values[0]
                if pellet_HE_time < (press_pellet_time+5000):
                  times_HE_pellet.append(pellet_HE_time)

                else:
                  times_HE_pellet.append(np.nan)
              except: 
                times_HE_pellet.append(np.nan)

            else:
              times_pellet.append(np.nan)
              times_HE_pellet.append(np.nan)
          except: 
            times_pellet.append(np.nan)
            times_HE_pellet.append(np.nan)

        else:
          times_lever_press.append(np.nan)
          times_HE_pellet.append(np.nan)
          times_pellet.append(np.nan)
      except: 
        times_lever_press.append(np.nan)
        times_HE_pellet.append(np.nan)
        times_pellet.append(np.nan)

    prev_time = time

  return times_lever_extend, times_lever_press, times_pellet, times_HE_pellet, left_right_extend

In [122]:
def make_times_df(times_lever_extend, times_lever_press, times_pellet, times_HE_pellet, left_right_extend):

  times_df = pd.DataFrame(data=[times_lever_extend, times_lever_press, times_pellet, times_HE_pellet, left_right_extend]).T
  times_df.columns = ['times_lever_extend', 'times_lever_press', 'times_pellet', 'times_HE_pellet', 'left_right_extend']

  times_df['times_trial_start'] = times_df['times_lever_extend'] - 20000
  times_df.loc[times_df['times_trial_start']<0, 'times_trial_start'] = 0

  times_df['times_trial_end'] = times_df['times_lever_press'] + 20000
  times_df['trial'] = np.arange(0, times_df.shape[0])

  times_df['diff_press'] = times_df['times_lever_press'] - times_df['times_lever_extend']
  times_df['diff_pellet'] = times_df['times_pellet'] - times_df['times_lever_press']
  times_df['diff_HE'] = times_df['times_HE_pellet'] - times_df['times_pellet']
  times_df['diff_trial'] = times_df['times_trial_end'] - times_df['times_trial_start']

  times_df.loc[times_df['times_lever_press'].isna(), 'no_press'] = '1'
  times_df.loc[times_df['times_pellet'].isna(), 'no_pellet'] = 1

  times_df = times_df[['trial', 'left_right_extend', 'no_press', 'no_pellet', 
           'times_trial_start', 'times_lever_extend', 'times_lever_press', 'times_pellet', 'times_HE_pellet', 'times_trial_end',
           'diff_press', 'diff_pellet', 'diff_HE', 'diff_trial']]

  return times_df

In [123]:
def process_trace(data):
    
    data_int = data
    
    if data_int['FP_signal'].min() > 0:
      data_int['disconnect'] = 'no'
    else:
      data_int['disconnect'] = 'yes'

    #apply 5hz lowpass filter 
    b, a = ss.butter(4, 20, 'low', fs=1000) 
    FP_signal_5hz = ss.filtfilt(b, a, data_int['FP_signal'])
    data_int['FP_signal_5hz'] = FP_signal_5hz

    #detrend
    FP_signal_detrend = ss.detrend(data_int['FP_signal']) 
    data_int['FP_signal_detrend'] = FP_signal_detrend

    FP_signal_5hz_detrend = ss.detrend(data_int['FP_signal_5hz']) 
    data_int['FP_signal_5hz_detrend'] = FP_signal_5hz_detrend
    FP_signal_detrend_5hz = ss.filtfilt(b, a, data_int['FP_signal_detrend'])
    data_int['FP_signal_detrend_5hz'] = FP_signal_detrend_5hz

    #median filter to remove disconnects
    result = ndimage.median_filter(data_int['FP_signal_detrend_5hz'].values, size=500)
    data_int['FP_signal_medfilt'] = result

    #z score across trace
    numerator = np.subtract(data_int['FP_signal_5hz_detrend'], np.nanmean(data_int['FP_signal_5hz_detrend']))
    zscore = np.divide(numerator, np.nanstd(data_int['FP_signal_5hz_detrend']))
    data_int['zscore'] = zscore

    numerator = np.subtract(data_int['FP_signal_medfilt'], np.nanmean(data_int['FP_signal_medfilt']))
    zscore = np.divide(numerator, np.nanstd(data_int['FP_signal_medfilt']))
    data_int['zscore_medfilt'] = zscore

    return data_int

In [124]:
def make_trials_df(times_df, TTL_dataframe_session):

  trials_df = pd.DataFrame()

  i=0
  for time in times_df['times_lever_extend'].unique():

    print(time)
    
    start = times_df['times_trial_start'][i]
    extend = times_df['times_lever_extend'][i]
    press = times_df['times_lever_press'][i]
    HE = times_df['times_HE_pellet'][i]
    end = times_df['times_trial_end'][i]

    #4 phases of trial to normalize to same time line 
    # rest -> levers extend -> lever press (reward) -> HE for reward -> rest 
    df_start_extend = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=start) & (TTL_dataframe_session['session_time']<extend)].sort_values('session_time')
    df_extend_press = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=extend) & (TTL_dataframe_session['session_time']<press)].sort_values('session_time')
    df_press_HE = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=press) & (TTL_dataframe_session['session_time']<HE)].sort_values('session_time')
    df_HE_end = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=HE) & (TTL_dataframe_session['session_time']<end)].sort_values('session_time')

    #each trial will have a lever extension
    df_start_extend_resample = ss.resample(x=df_start_extend['FP_signal'].values, num=20000)

    #mouse doesn't always press (missed trials with time out)
    if press > 0:
      df_extend_press_resample = ss.resample(x=df_extend_press['FP_signal'].values, num=3000)
      #even if mouse presses, some trials there is no pellet (so no head entry for pellet)
      if HE > 0:
        df_press_HE_resample = ss.resample(x=df_press_HE['FP_signal'].values, num=1000)
        df_HE_end_resample = ss.resample(x=df_HE_end['FP_signal'].values, num=20000)
      else:
        df_press_noHE = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=press) & (TTL_dataframe_session['session_time']<press+1000)].sort_values('session_time')
        df_press_HE_resample = ss.resample(x=df_press_noHE['FP_signal'].values, num=1000)
        #setup end also 
        df_noHE_end = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=press+1000) & (TTL_dataframe_session['session_time']<press+21000)].sort_values('session_time')
        df_HE_end_resample = ss.resample(x=df_noHE_end['FP_signal'].values, num=20000)
    else:
      df_extend_nopress = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=extend) & (TTL_dataframe_session['session_time']<extend+3000)].sort_values('session_time')
      df_extend_press_resample = ss.resample(x=df_extend_nopress['FP_signal'].values, num=3000)
      #mouse also won't have a HE for a pellet also 
      df_nopress_noHE = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=extend+3000) & (TTL_dataframe_session['session_time']<extend+4000)].sort_values('session_time')
      df_press_HE_resample = ss.resample(x=df_nopress_noHE['FP_signal'].values, num=1000)
      df_noHE_end = TTL_dataframe_session[(TTL_dataframe_session['session_time']>=extend+4000) & (TTL_dataframe_session['session_time']<extend+24000)].sort_values('session_time')
      df_HE_end_resample = ss.resample(x=df_noHE_end['FP_signal'].values, num=20000)

    trace_final = np.concatenate((df_start_extend_resample, df_extend_press_resample, df_press_HE_resample, df_HE_end_resample), axis=0)

    trials_df_int = pd.DataFrame(columns=['FP_signal'])
    trials_df_int['FP_signal'] = trace_final
    trials_df_int['index'] = np.arange(0, trials_df_int.shape[0])/1000
    trials_df_int['trial'] = i
    trials_df_int['no_press'] = times_df[times_df['times_lever_extend']==time]['no_press'].values[0]
    trials_df_int['no_pellet'] = times_df[times_df['times_lever_extend']==time]['no_pellet'].values[0]
    trials_df_int['left_right_extend'] = times_df[times_df['times_lever_extend']==time]['left_right_extend'].values[0]
    trials_df_int['trial_start'] = start

    #process trace
    trials_df_int = process_trace(trials_df_int)

    i+=1

    if trials_df.shape[0] == 0:
      trials_df = trials_df_int
    else:
      trials_df = pd.concat([trials_df, trials_df_int], ignore_index=True)

  return trials_df

In [None]:
times_dir_dataframe = pd.DataFrame()
trials_dir_dataframe = pd.DataFrame()

for outer_path in outer_path_list[-16:-8]:

    print(outer_path, '\n')

    #make list of file paths and zip TTL and FP files
    print('Making file list...')

    if outer_path.split('/') == '.DS_Store':
        continue

    FP_TTL_list = make_FP_TTL_list(outer_path)

    #Combine data from all files and create df
    print('Creating data frame from files...')
    TTL_dataframe = make_df_from_file_list(FP_TTL_list)

    #create TTL_prior and then use to mark start of new TTLs
    print('Expanding TTLs...')
    TTL_dataframe_session = make_event_from_TTL(TTL_dataframe)

    #get trial times for each event of interest
    print('Collecting event times...')
    times_lever_extend, times_lever_press, times_pellet, times_HE_pellet, left_right_extend = get_trial_times(TTL_dataframe_session)   
    times_df = make_times_df(times_lever_extend, times_lever_press, times_pellet, times_HE_pellet, left_right_extend)
    print('Trial count: ', times_df['trial'].max())

    if times_dir_dataframe.shape[0] == 0:
      times_dir_dataframe = times_df
    else:
      times_dir_dataframe = pd.concat([times_dir_dataframe, times_df], ignore_index=True)

    #upsample/downsample to put all trials on same time scale
    print('Creating normalized trials...')
    trials_df = make_trials_df(times_df, TTL_dataframe_session)  

    trials_df['date'] = TTL_dataframe_session['date'].unique()[0]
    trials_df['task'] = TTL_dataframe_session['task'].unique()[0]
    trials_df['session'] = TTL_dataframe_session['session'].unique()[0]
    trials_df['animal'] = TTL_dataframe_session['animal'].unique()[0]
    print('Trial count: ', trials_df['trial'].max())

    if trials_dir_dataframe.shape[0] == 0:
      trials_dir_dataframe = trials_df
    else:
      trials_dir_dataframe = pd.concat([trials_dir_dataframe, trials_df], ignore_index=True)

    print('\n')

print(times_dir_dataframe.shape)
print(trials_dir_dataframe.shape)

times_dir_dataframe.head()

/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_100.1_1596_211129 

Making file list...
Creating data frame from files...
Expanding TTLs...
Collecting event times...
Trial count:  22
Creating normalized trials...
1276
47800
101300
147000
192500
242800
292333
305100
352252
399700
449300
497800
506600
560300
605600
665594
742300
795900
854504
904239
952300
1005700
1058700
Trial count:  22


/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/cohort1/PDT/forced/PDT_100.1_1597_211129 

Making file list...
Creating data frame from files...
Expanding TTLs...


In [None]:
times_dir_dataframe.median()

In [None]:
trials_dir_dataframe.head()

In [None]:
path_meta = '/content/drive/Shareddrives/Schindler Iterative Translation Lab/data/FP/meta.xlsx'
meta = pd.DataFrame(data=pd.read_excel(path_meta))

print(meta.shape)
meta.tail()

In [None]:
trials_dir_dataframe['animal'] = [int(x) for x in trials_dir_dataframe['animal']]

trials_dir_dataframe = meta.merge(trials_dir_dataframe, on='animal')

trials_dir_dataframe.head()

In [None]:
trials_dir_dataframe.loc[((trials_dir_dataframe['lever']=='right')&(trials_dir_dataframe['left_right_extend']=='left')), 'prob_lever'] = 'safe'
trials_dir_dataframe.loc[((trials_dir_dataframe['lever']=='right')&(trials_dir_dataframe['left_right_extend']=='right')), 'prob_lever'] = 'risky'

trials_dir_dataframe.loc[((trials_dir_dataframe['lever']=='left')&(trials_dir_dataframe['left_right_extend']=='right')), 'prob_lever'] = 'safe'
trials_dir_dataframe.loc[((trials_dir_dataframe['lever']=='left')&(trials_dir_dataframe['left_right_extend']=='left')), 'prob_lever'] = 'risky'

trials_dir_dataframe.head()

In [None]:
plt.figure(figsize=(20,7))

d = trials_dir_dataframe[trials_dir_dataframe['disconnect']=='no']
d = d[d['trial']>5]
sns.lineplot(x='index', y='zscore_medfilt', data=d, ci=None, n_boot=1, markers=True, hue='prob_lever', size='group')

plt.show()

In [None]:
groupby = trials_dir_dataframe[trials_dir_dataframe['prob_lever']=='safe'].groupby(['trial', 'index'])['zscore'].mean().reset_index()
groupby = groupby.pivot('trial', 'index', "zscore")
plt.figure(figsize=(30,9))
ax = sns.heatmap(groupby, cmap="coolwarm", vmin=-2, vmax=2)
plt.show()