In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import re
import os

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings; warnings.simplefilter('ignore')

Use os module to create list of data folders

In [2]:
path_2 = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/Pavlovian/round 2 9.2018/Pav/Pav7'
path_1 = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Behavior/Med boxes/Pavlovian/round 1 6.2018/Pav/Pav7'
session_list_1 = os.listdir(path_1)
session_list_2 = os.listdir(path_2)

In [3]:
file_paths = []
for file in session_list_1:
    file_path = path_1 + '/' + file
    file_paths.append(file_path)
    
file_paths_2 = []
for file in session_list_2:
    file_path = path_2 + '/' + file
    file_paths.append(file_path)

In [4]:
def get_file_info(file_path_name):
    # takes in a file name, finds the date, animal_number, and session and saves each accordingly
    import re
    
    path_split = file_path_name.split('/')
    file_split = path_split[-1].split('_')
    
    for row in file_split:
        if re.search(r'^......$', row):
            date = row
        if re.search(r'^...$', row):
            animal_number = row
        if re.search(r'^....$', row):
            session = row

    return date, animal_number, session

In [5]:
def make_array_from_path (file_path_name):
    # takes in path, creates array (each row of array is a row from original med file)
    import numpy as np
    import pandas as pd
    
    data = pd.read_table(file_path_name)
    data_table = pd.DataFrame(data = data)
    data_array = data_table.values
    
    return data_array

In [6]:
def get_events_and_times(array, event_letter, time_letter):
    #takes in array and the letters Med PC program uses to signify an event_stamp and time_stamp; 
    #finds all event_stamps and corresponding time_stamps; creats pandas dataframe
    import re
    import numpy as np
    import pandas as pd

    i = 0
    event_data = []
    time_data = []
    while i < len(array):
        if re.search(str(event_letter + ':'), str(array[i])):
            i = i + 1
            while re.search('\[\'\ ', str(array[i])):
                split = array[i][0].split()[1:]
                for element in split:
                    event_data.append(float(element))
                i = i + 1
        if re.search(str(time_letter + ':'), str(array[i])):
            i = i + 1
            while re.search('\[\'\ ', str(array[i])):
                split = array[i][0].split()[1:]
                for element in split:
                    time_data.append(float(element))
                i = i + 1
        i = i + 1
    
    eandt = np.column_stack((event_data, time_data))
    es_et = pd.DataFrame(data = eandt)
    es_et.columns = ['event_stamp', 'time_stamp']
    
    return es_et

In [7]:
def get_trial_times(events_and_times, trial_start, trial_end, ITI_start, ITI_end):
    #takes in dataframe of events and corresponding time_stamps and the Med PC program code for start and end of trial segment
    #e.g. conditioned stimulus onset and offset, ITI onset and offset
    #finds time_stamps for each start and end, puts into new dataframe
    import numpy as np
    import pandas as pd

    start_time = events_and_times[events_and_times.event_stamp == trial_start]['time_stamp'].values
    end_time = events_and_times[events_and_times.event_stamp == trial_end]['time_stamp'].values
    start_time_ITI = events_and_times[events_and_times.event_stamp == ITI_start]['time_stamp'].values
    end_time_ITI = events_and_times[events_and_times.event_stamp == ITI_end]['time_stamp'].values
    
    times = pd.DataFrame(data = [start_time, end_time, start_time_ITI[0:len(start_time)], end_time_ITI[0:len(start_time)]])
    times = times.T
    times.columns = ['trial_start', 'trial_end', 'ITI_start', 'ITI_end']
    
    return times

In [8]:
def create_trials_df(events_and_times, trial_times):
    
    i = 0
    trial_data = pd.DataFrame()
    
    while i < len(trial_times):
        data = events_and_times[(events_and_times['time_stamp'] >= trial_times.trial_start[i]) & (events_and_times['time_stamp'] <= trial_times.trial_end[i])]
        data['trial_#'] = [i+1]*len(data)
        data['trial_start'] = trial_times.trial_start[i]
        trial_data = pd.concat([trial_data, data], axis = 0, ignore_index=True)
        i = i + 1
    
    return trial_data

In [9]:
def count(trial_data, bit, trials=25):
    i = 0
    count = []
    while i < trials:
        data = trial_data[trial_data['trial_#'] == i]
        count.append(len(data[data['event_stamp'] == bit]))
        i = i + 1
    
    return count

In [10]:
def latency(trial_data, bit, trials=25):
    i = 0
    latency_1st = []
    latency_ave = []
    while i < trials:
        data = trial_data[trial_data['trial_#'] == i]
        data = data[data['event_stamp'] == bit]
        if len(data) < 1:
            latency_1st.append(0.0)
            latency_ave.append(0.0)
        else:
            latency_1st.append((data['time_stamp'] - data['trial_start']).values[0])
            latency_ave.append((data['time_stamp'] - data['trial_start']).values.mean())
        i = i + 1
    
    return latency_1st, latency_ave

In [11]:
def create_session_df(trial_data, bit_list, trials = 25):
    session_data = pd.DataFrame(index=np.arange(0,trials))
    
    for bit in bit_list:
        session_data[str('count_bit' + str(bit))] = count(trial_data, bit)
        session_data[str('latency_1st_bit' + str(bit))], session_data[str('latency_ave_bit' + str(bit))] = latency(trial_data, bit)
    
    return session_data

In [18]:
file_path = file_paths[0]
name, animal_number, session = get_file_info(file_path)
array = make_array_from_path(file_path)
events_and_times = get_events_and_times(array, "E", "T")
trial_times = get_trial_times(events_and_times, 22, 23, 15, 16)
trial_data = create_trials_df(events_and_times, trial_times)
session_df = create_session_df(trial_data, [6])

In [None]:
def PCA_analysis(cs_iti, es_et):
    #takes in dataframe of cs and ITI start and end times
    #takes in dataframe of all event_stamps and corresponding time_stamps
    #computes PCA values for each trial and session, places in new dataframe
    import numpy as np
    import pandas as pd
    
    PCA_values = pd.DataFrame()

    lever = -1
    lever_press = 0 #number of lever press/deflections during cs
    head_entries = 0 #number of head entries into the food cup during cs
    lever_prob = 0 #number of trials with a lever press, divided by the total number of trials
    head_prob = 0 #number of trials with a head entry, divided by the total number of trials
    ave_lat_lever = 0 #latency of first lever press averaged over the number of trials with a lever deflection
    ave_lat_head = 0 #latency of first head entry averaged over the number of trials with a head entry

    count_lever = 0 #running count of trials with a lever press
    count_head = 0 #running count of trials with a head entry
    lat_lever_sum = 0 #running sum of latency of first lever press for each trial with a lever press
    lat_head_sum = 0 #running sum of latency of first head entry for each trial with a head entry

    response_bias = 0 #ratio of lever presses and food cup entries in relation to total number of responses
    prob_diff = 0 #the difference between the probability of pressing the lever and the probability of entering the food cup
    lat_score = 0 #difference between the latencies to approach the lever and the food cup
    PCA_score = 0 #ave of above three scores (-1 is completely goal-tracking; +1 is completely sign-tracking)

    i = 0

    while i < 25:
        data = es_et[(es_et.time_stamp >= cs_iti.CS_start[i]) & (es_et.time_stamp <= cs_iti.CS_end[i])]

        if len(data[data.event_stamp == 1]) > 0:
            lever = 1
        if len(data[data.event_stamp == 2]) > 0:
            lever = 2
            
        if lever > 0:
            lever_press = lever_press + len(data[data.event_stamp == lever])
    
        head_entries = head_entries + len(data[data.event_stamp == 6])

        if len(data[data.event_stamp == lever]) > 0:
            count_lever = count_lever + 1
            lat_lever = data[data.event_stamp == lever].time_stamp.iloc[0] - cs_iti.CS_start[i]
            lat_lever_sum = lat_lever_sum + lat_lever
        
        if len(data[data.event_stamp == 6]) > 0:
            count_head = count_head + 1
            lat_head = data[data.event_stamp == 6].time_stamp.iloc[0] - cs_iti.CS_start[i]
            lat_head_sum = lat_head_sum + lat_head
        
        i = i + 1
    
    lever_prob = count_lever / 25
    head_prob = count_head / 25

    if lat_lever_sum > 0:
        ave_lat_lever = lat_lever_sum / count_lever
    if lat_head_sum > 0:
        ave_lat_head = lat_head_sum / count_head
    
    if (lever_press > 0) | (head_entries > 0):
        response_bias = (lever_press - head_entries) / (lever_press + head_entries)

    prob_diff = lever_prob - head_prob
    lat_score = (ave_lat_head - ave_lat_lever) / 10

    PCA_score = (response_bias + prob_diff + lat_score) / 3

    PCA_values['lever_press'] = [lever_press]
    PCA_values['head_entries'] = head_entries
    PCA_values['lever_prob'] = lever_prob
    PCA_values['head_prob'] = head_prob
    PCA_values['ave_lat_lever'] = ave_lat_lever
    PCA_values['ave_lat_head'] = ave_lat_head
    PCA_values['response_bias'] = response_bias
    PCA_values['prob_diff'] = prob_diff
    PCA_values['PCA_score'] = PCA_score

    return PCA_values

In [None]:
PCA_df = PCA_analysis(cs_iti, es_et)

In [None]:
PCA_df