In [1]:
#getting and working with data
import pandas as pd
import numpy as np
import re
import os
from itertools import groupby
import datetime as dt

#visualizing results
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#import yellowbrick as yb

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings; warnings.simplefilter('ignore')
np.set_printoptions(suppress=True)

In [2]:
def get_file_info(file_path_name):
    # takes in a file path, finds the date, animal_number, session, session number, and sub_session (if subsession=True),
    #saves each accordingly
    
    path_split = file_path_name.split('/')
    file_split = path_split[-1].split('_')

    for row in file_split:

        if re.search(r'^\d\d\d\d\d\d$', row):
            date = row
        if re.search(r'^\d\d\d$', row):
            animal_number = row
        if re.search(r'^\d$', row):
            session_num = row 
        if re.search(r'((F|f)orce)|((C|c)hoice)', row):
            sub_session = row
        else:
            sub_session = np.nan
        
    try:
        session = str(file_split[0] + '_' + session_num)
    except: 
        session = file_split[0]

    return date, animal_number, session, sub_session

In [3]:
def make_array_from_path (file_path_name):
    # takes in file path, creates array (each row of array is a row from original med file)
    import numpy as np
    import pandas as pd
    
    data = pd.read_table(file_path_name)
    data_table = pd.DataFrame(data = data)
    data_array = data_table.values
    
    return data_array

In [4]:
def get_events_and_times(array, letter_1, letter_2, order):
    #takes in array and the letters Med PC program uses to signify an event_stamp and time_stamp; 
    #finds all event_stamps and corresponding time_stamps; creats pandas dataframe
    #order is whether the event or time stamp is letter_1 or letter_2
    
    import re
    import numpy as np
    import pandas as pd

    i = 0
    letter_1_array = []
    letter_2_array = []

    while i < len(array):
        if re.search(str(letter_1 + ':'), str(array[i])):
            i = i + 1
            while re.search('\[\'\ ', str(array[i])):
                split = array[i][0].split()[1:]
                for element in split:
                    letter_1_array.append(float(element))
                i = i + 1
        if re.search(str(letter_2 + ':'), str(array[i])):
            i = i + 1
            while re.search('\[\'\ ', str(array[i])):
                split = array[i][0].split()[1:]
                for element in split:
                    letter_2_array.append(float(element))
                i = i + 1
        i = i + 1
    
    if order == 'event':
        eandt = np.column_stack((letter_1_array, letter_2_array))
        es_et = pd.DataFrame(data = eandt)
        es_et.columns = ['event_stamp', 'time_stamp']
        
    if order == 'time':
        eandt = np.column_stack((letter_2_array, letter_1_array))
        es_et = pd.DataFrame(data = eandt)
        es_et.columns = ['event_stamp', 'time_stamp']
    
    return es_et

In [5]:
def get_trial_times_PR(events_and_times, right_extends, left_extends, ITI_start, ITI_end):
    #takes in dataframe of events and corresponding time_stamps and the Med PC program code for start and end of trial segment
    #e.g. conditioned stimulus onset and offset, ITI onset and offset
    #finds time_stamps for each start and end, puts into new dataframe
    import numpy as np
    import pandas as pd

    start_time = events_and_times[(events_and_times.event_stamp == right_extends) | (events_and_times.event_stamp == left_extends)]['time_stamp'].values
    end_time = events_and_times[events_and_times.event_stamp == ITI_start]['time_stamp'].values
    start_time_ITI = events_and_times[events_and_times.event_stamp == ITI_start]['time_stamp'].values
    end_time_ITI = events_and_times[events_and_times.event_stamp == ITI_end]['time_stamp'].values
    
    times = pd.DataFrame(data = [start_time, end_time, start_time_ITI[0:len(start_time)], end_time_ITI[0:len(start_time)]])
    times = times.T
    times.columns = ['trial_start', 'trial_end', 'ITI_start', 'ITI_end']
    
    return times

In [6]:
def create_trials_df(events_and_times, trial_times):
    #takes in df of events and times and df of trial times
    #creates tidy df containing events and times according to trials
    i = 0
    trial_data = pd.DataFrame()
    
    while i < len(trial_times):
        data = events_and_times[(events_and_times['time_stamp'] >= trial_times['trial_start'][i]) & (events_and_times['time_stamp'] <= trial_times['trial_end'][i])]
        data['trial_#'] = [i]*len(data)
        data['trial_start'] = trial_times['trial_start'][i]
        data['trial_end'] = trial_times['trial_end'][i]
        trial_data = pd.concat([trial_data, data], axis = 0, ignore_index=True)
        i = i + 1
    
    return trial_data

In [7]:
def count(trial_data, event_num):
    #get count of event_num
    trials = len((trial_data['trial_#'].unique()))
    i = 0
    count = []
    while i < trials:
        data = trial_data[trial_data['trial_#'] == i]
        count.append(len(data[data['event_stamp'] == event_num]))
        i = i + 1
    
    return count

In [18]:
def latency(trial_data, event_num):
    #get latency of time between trial_start and event_num
    trials = len((trial_data['trial_#'].unique()))
    i = 0
    latency_1st = []
    latency_ave = []
    while i < trials:
        data = trial_data[trial_data['trial_#'] == i]
        data = data[data['event_stamp'] == event_num]
        if len(data) < 1:
            latency_1st.append(0.0)
            latency_ave.append(0.0)
        else:
            latency_1st.append((data['time_stamp'] - data['trial_start']).values[0]/100)
            latency_ave.append((data['time_stamp'] - data['trial_start']).values.mean()/100)
        i = i + 1
    
    return latency_1st, latency_ave

In [19]:
def create_session_df(trial_data, event_dic):
    
    trials = len((trial_data['trial_#'].unique()))
    
    #get times
    times_df = pd.DataFrame(data = [trial_data.groupby('trial_#')['trial_start'].unique(), trial_data.groupby('trial_#')['trial_end'].unique()])
    times_df = times_df.T
    
    i = 0
    trial_times = []
    while i < times_df.shape[0]:
        trial_time = (times_df.iloc[i]['trial_end'] - times_df.iloc[i]['trial_start'])/100
        trial_times.append(trial_time[0])
        i += 1
    
    session_data = pd.DataFrame(index=np.arange(0,trials))

    for name, event in event_dic.items():
        session_data[str(name + '_count')] = count(trial_data, event)
        session_data[str(name + '_latency_1st')], session_data[str(name + '_latency_ave_bit')] = latency(trial_data, event)
    
    session_data['trial_num'] = np.arange(session_data.shape[0])
    session_data['trial_times'] = trial_times
    
    return session_data

### Variable Definitions

- A   = active lever (1 = Left, any other number = Right)
- B   = number of right nose pokes
- C   = number of left nose pokes
#### D   = event time stamp
#### E   = event identity stamp

### Event Stamps

####      46 - Right lever extends 
- trial starts

####      47 - Left lever extends
- trial starts

####      12 - Intertrial interval begins
####      13 - Intertrial interval over and reinforcement available again

####      10 - ITI Right lever press
####      11 - ITI Left lever press
####      14 - ITI Head entry

####      40 - PR reset lever pressed (right)
####      41 - PR reset lever pressed (left)
####      3  - Non-ITI Left lever press
####       4  - Non-ITI Right lever press
####      7  - Non-ITI Head entry
####      17 - Pellet delivery

-      5  - Reinforced left lever press
-      6  - Reinforced right lever press
-      8  - Non-ITI Right nose poke
-      9  - Non-ITI Left nose poke
-      15 - ITI Right nose-poke
-      16 - ITI Left nose-poke
-      18 - Tone delivery
-      42 - Right cue light active
-      43 - Left cue light active
-      44 - Right cue light off
-      45 - Left cue light off
-      48 - Right lever retracts
-      49 - Left lever retracts
-      50 - Head entry to initiate trial
-     100 - End of session marker

In [10]:
path_1 = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR'

session_list_1 = os.listdir(path_1)
print(len(session_list_1))
session_list_1[0:5]

34


['PR_1_683_190807',
 'PR_1_684_190807',
 'PR_1_685_190807',
 'PR_1_686_190807',
 'PR_1_687_190807']

In [11]:
file_paths = []
for file in session_list_1:
    file_path = path_1 + '/' + file
    file_paths.append(file_path)

print(len(file_paths))
file_paths[0:5]

34


['C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_683_190807',
 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_684_190807',
 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_685_190807',
 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_686_190807',
 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_687_190807']

In [15]:
#read animal info (lever assignment, group etc) into df

path_animal_info = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Analysis/Excel files/Med boxes/assignments/PR_round1.xlsx'
    
animal_info = pd.read_excel(path_animal_info)
animal_info = pd.DataFrame(data = animal_info)

animal_info.head()

Unnamed: 0,animal,lever,cage,group
0,683,2,1,2
1,684,2,2,1
2,685,2,2,1
3,686,1,2,1
4,687,2,3,1


In [20]:
%%time
final_data = pd.DataFrame()

#these are the event_stamps we want analyzed
event_dic = {'reinforcement': 17, 'right_lever_press': 4, 'left_lever_press': 3, 'HE_trial': 7}

for file in file_paths:
    print(file)
    
    #proccess file
    date, animal_number, session, sub_session = get_file_info(file)
    array = make_array_from_path(file)
    events_and_times = get_events_and_times(array, "D", "E", 'time')
    trial_times = get_trial_times_PR(events_and_times, 46, 47, 12, 13)
    trial_data = create_trials_df(events_and_times, trial_times)
    session_data = create_session_df(trial_data, event_dic)
    
    #create pandas tidy df
    session_data['date'] = [date]*(session_data.shape[0])
    session_data['animal_number'] = [animal_number]*(session_data.shape[0])
    session_data['session'] = [session]*(session_data.shape[0])

    final_data = pd.concat([final_data, session_data], axis = 0, ignore_index=True)

final_data = final_data[['date', 
                         'animal_number', 
                         'session', 
                         'trial_num', 
                         'reinforcement_count', 
                         'right_lever_press_count',
                         'right_lever_press_latency_1st',
                         'right_lever_press_latency_ave_bit', 
                         'left_lever_press_count',
                         'left_lever_press_latency_1st', 
                         'left_lever_press_latency_ave_bit',
                         'HE_trial_count', 
                         'HE_trial_latency_1st',
                         'HE_trial_latency_ave_bit', 
                         'trial_times']]

final_data.head()

C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_683_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_684_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_685_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_686_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_687_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_688_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_689_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_690_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/progressive_ratio/PR/PR_1_691_190807
C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med box

In [22]:
final_data.to_csv('PR_task_prepro.csv')

In [21]:
final_data.head()

Unnamed: 0,date,animal_number,session,trial_num,reinforcement_count,right_lever_press_count,right_lever_press_latency_1st,right_lever_press_latency_ave_bit,left_lever_press_count,left_lever_press_latency_1st,left_lever_press_latency_ave_bit,HE_trial_count,HE_trial_latency_1st,HE_trial_latency_ave_bit,trial_times
0,190807,683,PR_1,0,1,0,0.0,0.0,1,2.26,2.26,1,0.07,0.07,2.27
1,190807,683,PR_1,1,1,0,0.0,0.0,2,19.56,20.55,3,2.2,5.356667,21.55
2,190807,683,PR_1,2,1,0,0.0,0.0,2,14.75,15.055,4,5.25,8.7475,15.37
3,190807,683,PR_1,3,1,0,0.0,0.0,3,8.77,9.906667,1,10.77,10.77,11.66
4,190807,683,PR_1,4,1,0,0.0,0.0,4,52.41,55.61,6,2.29,36.99,59.47


In [None]:
#single file option for trouble shooting

run_variables = True

#file_path = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/probability_discounting/shape/Shape_4_choice_683_190618'
file_path = file_paths[0]
print('file_path:', '\n', file_path, '\n')

date, animal_number, session, sub_session = get_file_info(file_path)
print('date:', date, 'animal_number:', animal_number, 'session:', session, 'sub_session:', sub_session, '\n')

array = make_array_from_path(file_path)
print(array)

events_and_times = get_events_and_times(array, "I", "T")
print(events_and_times.head(), '\n')

trial_times = get_trial_times_two(events_and_times, 10, 16, 19, 20, 7)
print(trial_times.head(), '\n')

trial_data = create_trials_df(events_and_times, trial_times)
print(trial_data.head(), '\n')

bit_dic = {'reinforcement': 5, 'right_lever_choice': 8, 'left_lever_choice': 9, 'right_lever_force': 17, 'left_lever_force': 18, 'HE_trial': 11}
session_data = create_session_df(trial_data, bit_dic)

session_data['date'] = [date]*(session_data.shape[0])
session_data['animal_number'] = [animal_number]*(session_data.shape[0])
session_data['session'] = [session]*(session_data.shape[0])
session_data['sub_session'] = [sub_session]*(session_data.shape[0])

if run_variables == True:
        check_parameter_entry(session_data, animal_info)
    
if run_variables == False:
    session_data['rt_reward'] = np.nan
    session_data['lt_reward'] = np.nan
    session_data['prob_lever'] = np.nan
    
try:
    session_data['high_reward_lever'] = animal_info[animal_info['animal'] == animal_number]['high_reward']
except:
    session_data['high_reward_lever'] = np.nan

final_data = pd.DataFrame()

final_data = pd.concat([final_data, session_data], axis = 0, ignore_index=True)

final_data = final_data[['date', 'animal_number', 'session', 'sub_session', 'trial_num', 'high_reward_lever', 
                         'reinforcement_count', 'right_lever_force_count', 'right_lever_force_latency_1st', 
                         'left_lever_force_count', 'left_lever_force_latency_1st', 'right_lever_choice_count', 
                         'right_lever_choice_latency_1st', 'left_lever_choice_count', 'left_lever_choice_latency_1st', 
                         'HE_trial_count', 'HE_trial_latency_1st', 'HE_wait', 'rt_reward', 'lt_reward', 'prob_lever']]