In [2]:
import time
import os
import math
import numpy as np
import cv2
import pandas as pd
import pyautogui as pa

from lib.auto_GUI.auto_GUI_base import AutoGUIBase
from lib.auto_GUI.auto_PhotoZ import AutoPhotoZ
from lib.file.ROI_reader import ROIFileReader

####################################################################################
# Input (organized into subfolders by date): 
#     ROI files (single cell responses) for each slice and location
#     Stimulation times in stim_times.csv
#     ZDA files
#     paired_pulse_summary.csv
# Output:
#     paired_pulse_summary_edited.csv (values for each ROI, ROI pixel count)
####################################################################################

In [13]:
# Load data, not from Drive for Desktop since PhotoZ complains about loading zda from Drive
data_dir = "C:/Users/jjudge3/Desktop/Data/mm_full_pipeline_targets/Anna's/"
date_format = 'yyyy-mm-dd'
# measure window settings ( in frames )
pre_window_offset = 0
window_width = 40

input_csv = data_dir + 'paired_pulse_summary.csv'
output_csv = data_dir + 'paired_pulse_summary_edited.csv'
overwrite_intermed_dat_files = False  # whether to re-save value files

enable_photoZ_interaction = True
initialize_photoZ = False

# load the all-data CSV dir
df = pd.read_csv(input_csv)
df = pd.DataFrame(df)

In [3]:
# set up PhotoZ (open it manually)
if enable_photoZ_interaction:
    aPhz = AutoPhotoZ(data_dir=data_dir)
    if initialize_photoZ:
        aPhz.prepare_photoZ()

In [18]:
is_measure_window_width_set = window_width
for subdir, dirs, files in os.walk(data_dir):
    zda_files = []
    roi_files = {}  # map slice number to ROIs file
    
    if 'Usable' not in subdir:
        continue
    
    # locate important file names
    for f in files:
        if f.endswith(".zda"):
            zda_files.append(f)
        if 'ROIs' in f[-22:] and f.endswith('.dat'):
            metadata = f.replace('.dat','').split("_ROIs_")
            slice_no, loc_no = [int(x) for x in metadata[0].split("-")]
            if slice_no not in roi_files:
                roi_files[slice_no] = {}
            if loc_no not in roi_files[slice_no]:
                roi_files[slice_no][loc_no] = []
            start, end = [int(x) for x in metadata[1].split("_to_")]
            roi_files[slice_no][loc_no].append(
                {"start": start,
                "end": end,
                "filename": f}
            )
    
    # open stimtimes.csv file for this subdir
    stim_times_df = pd.read_csv(subdir + "/stimtimes.csv",
                                header=None,
                                names=['Rec_ID', 'Stim_Time_1', 'Stim_Time_2'])
   
    if len(roi_files) < 1 or len(zda_files) < 1:
        continue
    
    for zda_file in zda_files:
        rec_id = zda_file.split('.')[0]
            
        # determine date and slice, loc, rec
        aPhz = AutoPhotoZ(data_dir=subdir)
        trace_values_to_save = {
            'Max Amp': {'method': aPhz.select_maxamp_trace_value, 
                         'filename_id': "maxAmp_"},
            'Max Amp / SD': {'method': aPhz.select_SNR_trace_value, 
                         'filename_id': 'SNR_'},
            'Half Amp Latency': {'method': aPhz.select_latency_trace_value, 
                         'filename_id': "latency_"},
            'Half Width': {'method': aPhz.select_half_width_trace_value, 
                         'filename_id': 'halfWidth_'},
            'Max Amp Latency': {'method': aPhz.select_peaktime_trace_value, 
                         'filename_id': 'maxAmpLatency_'},
            'Half Rise Time': {'method': aPhz.select_half_rise_time_trace_value, 
                         'filename_id': 'riseTime_'},
            'Half Decay Time': {'method': aPhz.select_half_decay_time_trace_value, 
                         'filename_id': 'decayTime_'}
        }

        is_zda_file_open = False
        
        date = subdir.split("_Usable")[0][-10:]
        date = [int(x) for x in date.split("-")]
        if date_format != 'yyyy-mm-dd':
            date[2] += 2000  # full year format
        else:
            date = [date[1], date[2], date[0]]
        date = "/".join([str(d) for d in date])
        
        slice_no, loc_no, rec_no = [int(x) for x in rec_id.split("_")]
        
        # find stim times for this recording
        stim_times_rec = stim_times_df[stim_times_df['Rec_ID'] == rec_id]
        stim_time_1 = int(stim_times_rec['Stim_Time_1'])
        stim_time_2 = int(stim_times_rec['Stim_Time_2'])
        
        for roi_grp_idx in range(len(roi_files[slice_no][loc_no])):
            roi_file = roi_files[slice_no][loc_no][roi_grp_idx]['filename']
            roi_id_start = roi_files[slice_no][loc_no][roi_grp_idx]['start']
            roi_id_end = roi_files[slice_no][loc_no][roi_grp_idx]['end']
            print("\n\nscraping data from ", zda_file, 
                  "using", roi_file, "on date:", date)

            filename_end = rec_id + '.dat'
            
            # find # pixels in ROI 
            rfr = ROIFileReader(subdir + "/" + roi_file)
            n_pixels = [len(r) for r in rfr.get_roi_list()]
            
            # see if we can skip this entire ROI group (if files already exist)
            if not overwrite_intermed_dat_files:
                are_files_complete = True
                for trace_val_type in trace_values_to_save:
                    for stim_time_idx in range(1,3):
                        value_filename = subdir + "/" + trace_values_to_save[trace_val_type]['filename_id'] \
                                                + str(stim_time_idx) + "_" + filename_end
                        # may be able to skip saving file
                        if not os.path.exists(value_filename):
                            are_files_complete = False
                            break
                    if not are_files_complete:
                        break
            if are_files_complete:
                print("We already have all the value files for this setup. Skipping...")
                
            # open the PhotoZ file
            if enable_photoZ_interaction and not is_zda_file_open and not are_files_complete:
                aPhz.select_PhotoZ()
                aPhz.open_zda_file(subdir + "/" + zda_file)
                is_zda_file_open = True

            # open roi file in photoZ
            if enable_photoZ_interaction and not are_files_complete:
                aPhz.select_roi_tab()
                aPhz.open_roi_file(subdir + "/" + roi_file)

                
            for stim_time_idx in range(1,3):
                stim_time = [stim_time_1, stim_time_2][stim_time_idx-1]
        
                # set the stim window
                if enable_photoZ_interaction and not are_files_complete:
                    # is_measure_window_width_set is equal to window_width if window not yet set.
                    aPhz.set_measure_window(stim_time - pre_window_offset, 
                                            is_measure_window_width_set)
                    is_measure_window_width_set = None  # no need to set in future


                # save trace values from PhotoZ
                for trace_val_type in trace_values_to_save:
                    value_filename = subdir + "/" + trace_values_to_save[trace_val_type]['filename_id'] \
                                            + str(stim_time_idx) + "_" + filename_end
                    # may be able to skip saving file
                    if not (os.path.exists(value_filename) and not overwrite_intermed_dat_files):
                        value_method = trace_values_to_save[trace_val_type]['method']
                        if enable_photoZ_interaction and not are_files_complete:
                            value_method()
                            aPhz.save_trace_values(value_filename)
                    trace_values_to_save[trace_val_type]['full_path_filename_' + str(stim_time_idx)] = value_filename

            ################################
            new_rows = {"Date" : [date for i in range(roi_id_start, roi_id_end+1)], 
                        "Slice_Loc_Rec": [date for i in range(roi_id_start, roi_id_end+1)],
                        "ROI ID":  [i for i in range(roi_id_start, roi_id_end+1)],
                        "Stim Time #1":  [stim_time_1 for i in range(roi_id_start, roi_id_end+1)],
                        "Stim Time #2": [stim_time_2 for i in range(roi_id_start, roi_id_end+1)],
                        "Pixel Count": n_pixels[:roi_id_end+1- roi_id_start]}
            
            # sanity check on ROI file lengths
            if len(n_pixels) > roi_id_end+1- roi_id_start:
                print(n_pixels, len(n_pixels))

            for trace_val_type in trace_values_to_save:
                for stim_time_idx in range(1,3):
                    trace_stim_idx = trace_val_type + " #" + str(stim_time_idx)
                    if trace_stim_idx not in new_rows:
                        new_rows[trace_stim_idx] = []
                    if trace_stim_idx not in df.columns:
                        df[trace_stim_idx] = []
                    intermed_df = pd.read_csv(trace_values_to_save[trace_val_type]['full_path_filename_' + str(stim_time_idx)],
                                              sep='\t',
                                              header=None,
                                              names=['Index',  'Values'])
                    for i in range(roi_id_end+1-roi_id_start):
                        v = intermed_df['Values'][i]
                        new_rows[trace_stim_idx].append(v)
            
            for key in new_rows:
                print(key, len(new_rows[key]))
            print(df.columns)
                
            nrs = pd.DataFrame.from_dict(new_rows)
            print(nrs.shape, df.shape)
            df = pd.concat([nrs, df])




scraping data from  02_01_02.zda using 02-01_ROIs_1_to_26.dat on date: 9/2/2022
We already have all the value files for this setup. Skipping...
Date 26
Slice_Loc_Rec 26
ROI ID 26
Stim Time #1 26
Stim Time #2 26
Pixel Count 26
Max Amp #1 26
Max Amp #2 26
Max Amp / SD #1 26
Max Amp / SD #2 26
Half Amp Latency #1 26
Half Amp Latency #2 26
Half Width #1 26
Half Width #2 26
Max Amp Latency #1 26
Max Amp Latency #2 26
Half Rise Time #1 26
Half Rise Time #2 26
Half Decay Time #1 26
Half Decay Time #2 26
Index(['Date', 'Slice_Loc_Rec', 'ROI ID', 'Stim Time #1', 'Stim Time #2',
       'Pixel Count', 'Max Amp #1', 'Max Amp #2', 'Max Amp / SD #1',
       'Max Amp / SD #2', 'Half Amp Latency #1', 'Half Amp Latency #2',
       'Half Width #1', 'Half Width #2', 'Max Amp Latency #1',
       'Max Amp Latency #2', 'Half Rise Time #1', 'Half Rise Time #2',
       'Half Decay Time #1', 'Half Decay Time #2', 'Interpulse Interval',
       'Paired Pulse Ratio'],
      dtype='object')
(26, 20) (1842, 22)



Date 33
Slice_Loc_Rec 33
ROI ID 33
Stim Time #1 33
Stim Time #2 33
Pixel Count 33
Max Amp #1 33
Max Amp #2 33
Max Amp / SD #1 33
Max Amp / SD #2 33
Half Amp Latency #1 33
Half Amp Latency #2 33
Half Width #1 33
Half Width #2 33
Max Amp Latency #1 33
Max Amp Latency #2 33
Half Rise Time #1 33
Half Rise Time #2 33
Half Decay Time #1 33
Half Decay Time #2 33
Index(['Date', 'Slice_Loc_Rec', 'ROI ID', 'Stim Time #1', 'Stim Time #2',
       'Pixel Count', 'Max Amp #1', 'Max Amp #2', 'Max Amp / SD #1',
       'Max Amp / SD #2', 'Half Amp Latency #1', 'Half Amp Latency #2',
       'Half Width #1', 'Half Width #2', 'Max Amp Latency #1',
       'Max Amp Latency #2', 'Half Rise Time #1', 'Half Rise Time #2',
       'Half Decay Time #1', 'Half Decay Time #2', 'Interpulse Interval',
       'Paired Pulse Ratio'],
      dtype='object')
(33, 20) (2171, 22)


scraping data from  04_03_01.zda using 04-03_ROIs_1_to_7.dat on date: 9/2/2022
We already have all the value files for this setup. Skipping...
Da

Date 100
Slice_Loc_Rec 100
ROI ID 100
Stim Time #1 100
Stim Time #2 100
Pixel Count 100
Max Amp #1 100
Max Amp #2 100
Max Amp / SD #1 100
Max Amp / SD #2 100
Half Amp Latency #1 100
Half Amp Latency #2 100
Half Width #1 100
Half Width #2 100
Max Amp Latency #1 100
Max Amp Latency #2 100
Half Rise Time #1 100
Half Rise Time #2 100
Half Decay Time #1 100
Half Decay Time #2 100
Index(['Date', 'Slice_Loc_Rec', 'ROI ID', 'Stim Time #1', 'Stim Time #2',
       'Pixel Count', 'Max Amp #1', 'Max Amp #2', 'Max Amp / SD #1',
       'Max Amp / SD #2', 'Half Amp Latency #1', 'Half Amp Latency #2',
       'Half Width #1', 'Half Width #2', 'Max Amp Latency #1',
       'Max Amp Latency #2', 'Half Rise Time #1', 'Half Rise Time #2',
       'Half Decay Time #1', 'Half Decay Time #2', 'Interpulse Interval',
       'Paired Pulse Ratio'],
      dtype='object')
(100, 20) (2594, 22)


scraping data from  05_03_03.zda using 05-03_ROIs_101_to_119.dat on date: 9/2/2022
We already have all the value files for t

Stim Time #1 27
Stim Time #2 27
Pixel Count 27
Max Amp #1 27
Max Amp #2 27
Max Amp / SD #1 27
Max Amp / SD #2 27
Half Amp Latency #1 27
Half Amp Latency #2 27
Half Width #1 27
Half Width #2 27
Max Amp Latency #1 27
Max Amp Latency #2 27
Half Rise Time #1 27
Half Rise Time #2 27
Half Decay Time #1 27
Half Decay Time #2 27
Index(['Date', 'Slice_Loc_Rec', 'ROI ID', 'Stim Time #1', 'Stim Time #2',
       'Pixel Count', 'Max Amp #1', 'Max Amp #2', 'Max Amp / SD #1',
       'Max Amp / SD #2', 'Half Amp Latency #1', 'Half Amp Latency #2',
       'Half Width #1', 'Half Width #2', 'Max Amp Latency #1',
       'Max Amp Latency #2', 'Half Rise Time #1', 'Half Rise Time #2',
       'Half Decay Time #1', 'Half Decay Time #2', 'Interpulse Interval',
       'Paired Pulse Ratio'],
      dtype='object')
(27, 20) (2997, 22)


scraping data from  07_01_03.zda using 07-01_ROIs_1_to_100.dat on date: 9/2/2022
We already have all the value files for this setup. Skipping...
Date 100
Slice_Loc_Rec 100
ROI ID 1

In [20]:
# divide amplitudes by 1000
df['Max Amp #1'] /= 1000
df['Max Amp #2'] /= 1000

df['Interpulse Interval'] =  (df['Stim Time #2'] - df['Stim Time #1']) / df['Stim Time #2']

# add paired pulse ratio column
df['Paired Pulse Ratio'] = df['Max Amp #2'] / df['Max Amp #1']

# write the pandas dataframe back to csv
df.to_csv(output_csv, index=False)

In [21]:
df

Unnamed: 0,Date,Slice_Loc_Rec,ROI ID,Stim Time #1,Stim Time #2,Pixel Count,Max Amp #1,Max Amp #2,Max Amp / SD #1,Max Amp / SD #2,...,Half Width #1,Half Width #2,Max Amp Latency #1,Max Amp Latency #2,Half Rise Time #1,Half Rise Time #2,Half Decay Time #1,Half Decay Time #2,Interpulse Interval,Paired Pulse Ratio
0,9/2/2022,9/2/2022,1,90,290,1,0.002451,0.004178,4.52277,7.71023,...,5.75871,3.484420,53.0,154.5,1.330700,1.215690,4.42801,2.268730,0.689655,1.704759
1,9/2/2022,9/2/2022,2,90,290,1,0.002384,0.001886,5.97157,4.72602,...,6.59574,9.405290,51.0,151.0,0.992736,1.296630,5.60300,8.108660,0.689655,0.791422
2,9/2/2022,9/2/2022,3,90,290,1,0.002024,0.002503,3.27366,4.04783,...,4.80899,7.802310,53.0,155.0,0.844747,4.851820,3.96424,2.950500,0.689655,1.236481
3,9/2/2022,9/2/2022,4,90,290,1,0.002960,0.001896,7.60110,4.86978,...,7.63417,6.730080,55.5,154.5,4.789060,1.714130,2.84510,5.015960,0.689655,0.640667
4,9/2/2022,9/2/2022,5,90,290,1,0.002664,0.001087,5.60906,2.28812,...,5.26090,-0.261387,54.0,145.0,3.591470,-0.823635,1.66942,0.562248,0.689655,0.407933
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21,9/2/2022,9/2/2022,22,90,190,1,0.004150,0.003495,6.36070,5.35701,...,4.15598,4.412380,56.0,102.5,1.170480,2.253140,2.98550,2.159240,0.526316,0.842205
22,9/2/2022,9/2/2022,23,90,190,1,0.002970,0.001169,5.46070,2.14966,...,3.20471,8.484020,52.0,104.5,1.156740,6.577950,2.04797,1.906070,0.526316,0.393660
23,9/2/2022,9/2/2022,24,90,190,4,0.003603,0.002913,6.17379,4.99109,...,3.37683,2.855960,51.0,101.0,1.236900,1.318520,2.13993,1.537430,0.526316,0.808432
24,9/2/2022,9/2/2022,25,90,190,1,0.004165,0.003379,4.92409,3.99495,...,3.87968,4.902730,57.0,104.0,1.871400,0.958233,2.00829,3.944500,0.526316,0.811308
