In [1]:
import os
import gc
import pathlib
import scipy.io
import pandas as pd
import numpy as np
import datetime as dt
from ctypes import *
from pypl2 import pypl2api, pypl2lib

### take trial type under consideration

# handle files

In [2]:
def get_files_list(dir_name, files_type):
    files_list = []
    full_dir_path = os.path.abspath(dir_name)
    
    for f in os.listdir(full_dir_path):
        if f.endswith(files_type):
            files_list.append((os.path.join(full_dir_path, f)))
    return files_list

In [3]:
matlab_files = [r'D:\PHb\Jan_07_2021_pHb\seq2\fov#01_trial#09_rep#01_logFile.mat', 
                r'D:\PHb\Jan_07_2021_pHb\seq2\fov#01_trial#10_rep#01_logFile.mat', 
                r'D:\PHb\Jan_07_2021_pHb\seq2\fov#01_trial#11_rep#01_logFile.mat']
plexon_files = [r'D:\PHb\Jan_07_2021_pHb\seq2\dat008-01.pl2', 
                r'D:\PHb\Jan_07_2021_pHb\seq2\dat009-01.pl2', 
                r'D:\PHb\Jan_07_2021_pHb\seq2\dat010-01.pl2']

# create df with the data

In [4]:
def create_matlab_with_nd_df(matlab_files):
    df = pd.DataFrame([{'matlab_filename': f, 
                        'ND': scipy.io.loadmat(f)['ND'][0][0], 
                        'matlab_time': scipy.io.loadmat(f)['startTime'][0]} for f in matlab_files])
    df['matlab_time'] = pd.to_datetime(df['matlab_time'].astype('str'), format='%b.%d,%Y %H:%M:%S')
    return df.sort_values(by=['matlab_time']).reset_index(drop=True)

In [5]:
def create_intensity_df(filename):
    intensity_values_df = pd.read_csv(filename, header=0, index_col=0)
    return intensity_values_df

In [6]:
intensity_values_df = create_intensity_df('10_intensities.csv')

In [7]:
def create_plexon_df(plexon_files):
    df = pd.DataFrame(plexon_files, columns=['plexon_filename'])
    
    for i, filename in enumerate(plexon_files):
        p = pypl2lib.PyPL2FileReader() # Create an instance of PyPL2FileReader.    
        handle = p.pl2_open_file(filename) # Verify that the file passed exists first, if it does open the file
        file_info = pypl2lib.PL2FileInfo() # Create instance of PL2FileInfo 
        res = p.pl2_get_file_info(handle, file_info)

        df.loc[df['plexon_filename'] == filename, 'plexon_time'] = (dt.time(file_info.m_CreatorDateTime.tm_hour, 
                                                                            file_info.m_CreatorDateTime.tm_min, 
                                                                            file_info.m_CreatorDateTime.tm_sec))
        
    return df.sort_values(by=['plexon_time']).reset_index(drop=True)


# load experiment data from files

In [8]:
def extract_channels_data(filename, channels_num, channel_type): # TODO:: rename function
    flag = True
    for i in range(channels_num):
        channel_name = '{}{:02d}'.format(channel_type, i+1)
        adfrequency, n, timestamps, fragmentcounts, ad = pypl2api.pl2_ad(filename, channel_name)
        if flag:
            channels_ad = np.empty([channels_num, n])
            flag = False
        channels_ad[i] = ad
    channels_ad = np.multiply(channels_ad, 1000000) #convert volts to microvolts
    channels_ad = np.int16(channels_ad)
    return n, channels_ad, adfrequency

In [9]:
def extract_pl2_data_into_bin(plexon_files, files_df, output_file_path, channel_type, prefix=''):
    for f, i in zip(plexon_files, range(len(plexon_files))):
        n, channels_ad, adfrequency = extract_channels_data(f, 32, channel_type)
        files_df.loc[files_df.plexon_filename == f, 'plexon_samples_num'] = n
        files_df.loc[files_df.plexon_filename == f, 'ad_frequency'] = adfrequency
        channels_ad.ravel(order='F').tofile('{}/{}sequence_{}.bin'.format(output_file_path, prefix, i + 1))
        print(channels_ad.shape)
        print(channels_ad.ravel(order='F').shape)
        del channels_ad
    return files_df

In [10]:
trial_type = 'sequence'
intensities_number = 10

In [11]:
files_path = 'files/{}'.format(trial_type)
matlab_files = get_files_list(files_path, '.mat')
plexon_files = get_files_list(files_path, '.pl2')

In [12]:
plexon_files_df = create_plexon_df(plexon_files)

In [13]:
plexon_files_df

Unnamed: 0,plexon_filename,plexon_time
0,C:\Users\shirahad\Documents\research\electroph...,08:52:31


In [14]:
# create df with all the relevant data per file
files_df = create_matlab_with_nd_df(matlab_files)

In [15]:
files_df = pd.merge(files_df, plexon_files_df, left_index=True, right_index=True)

In [16]:
files_df['matlab_filename'][11]

KeyError: 11

In [17]:
#os.path.abspath('quantum_catch_melanopsin_vs_LED_ND_with-diffuser_in_vivo_rig.mat')
intensity_values_df = create_intensity_df('{}_intensities.csv'.format(intensities_number))
files_df = pd.merge(files_df, intensity_values_df, on='ND', how='left')

In [18]:
# extract pl2 data into bin files and add number of samples per file to files_df
files_df = extract_pl2_data_into_bin(files_df['plexon_filename'], files_df, files_path, 'WB')

(32, 12716581)
(406930592,)


In [19]:
files_path

'files/sequence'

In [20]:
files_df = files_df.sort_values(by=['plexon_time']).reset_index(drop=True)

In [21]:
files_df

Unnamed: 0,matlab_filename,ND,matlab_time,plexon_filename,plexon_time,intensity,plexon_samples_num,ad_frequency
0,C:\Users\shirahad\Documents\research\electroph...,2,2022-06-09 08:52:31,C:\Users\shirahad\Documents\research\electroph...,08:52:31,14.854108,12716581.0,40000.0


In [22]:
files_df.to_csv('{}/files_extracted_data_.csv'.format(files_path), sep=',', encoding='utf-8')

In [23]:
def get_ts(filename, channel):
    n, timestamps, values = pypl2api.pl2_events(filename, channel) 
    return pd.Series(timestamps)

In [24]:
def create_ts_df(df, channel):
    ts_df = pd.DataFrame()
    ts_dict = {}
    for nd in df['ND']:
        files = df.loc[df['ND'] == nd, 'plexon_filename']
        # reformat duplicated nds
        for i, f in enumerate(files):
            ts_dict['{}_{}'.format(nd, i)] = get_ts(f, channel)
    return ts_df.from_dict(ts_dict)

In [25]:
# create dfs for on, off ts per event
on_df = create_ts_df(files_df, 10)
off_df = create_ts_df(files_df, 11)
# combine the on, off ts per event to 1 df 
combined_ts_df = pd.concat([on_df.add_suffix('_on'), off_df.add_suffix('_off')], axis=1, sort=False)
# write the data to a csv file
combined_ts_df.to_csv('{}/events_ts.csv'.format(files_path), sep=',', encoding='utf-8')

In [26]:
combined_ts_df

Unnamed: 0,2_0_on,2_0_off
0,0.934650,3.014475
1,4.103125,6.179450
2,7.269250,9.343575
3,10.432975,12.506850
4,13.597425,15.675725
...,...,...
95,301.944350,304.020225
96,305.113100,307.190225
97,308.280850,310.358275
98,311.451175,313.529700


In [None]:
gc.collect()

In [None]:
def merge_bins_to_one_bin(files_path, num_of_files, prefix=''):
    output_file = '{}/sequence.bin'.format(files_path)
    for i in range(1, num_of_files):
        with open(output_file, 'ab') as out_file, open('{}/{}sequence_{}.bin'.format(files_path, prefix, i), 'rb') as in_file:
            out_file.write(in_file.read())
    return

In [None]:
from scipy.io import FortranFile

def merge_bins_to_one_bin_fortran(files_path, num_of_files, prefix=''):
    output_file = '{}/sequence_fortran.bin'.format(files_path)
    f = FortranFile(output_file, 'w')
    for i in range(1, num_of_files):
        with open('{}/{}sequence_{}.bin'.format(files_path, prefix, i), 'rb') as in_file:
            f.write_record(in_file.read())
    f.close()
    return

In [None]:
merge_bins_to_one_bin(files_path, len(files_df) + 1)

In [None]:
merge_bins_to_one_bin_fortran(files_path, len(files_df) + 1)

In [None]:
# load npy data to 1 matrix and write it to 1 binary file (according to kilosort's requirements)
data = load_npy_to_one_matrix(files_df['plexon_filename'], files_path)
data.ravel(order='F').tofile('{}/sequence.bin'.format(files_path))
del data

In [None]:
parent_dir_path = r'C:\Users\shirahad\Documents\research\electrophysiology\analyze_electrophysiology_data\pl2kilosort'
files_path = 'files/{}'.format(trial_type)
full_files_path = os.path.join(parent_dir_path, files_path)
matlab_files = get_files_list(full_files_path, '.mat')
plexon_files = get_files_list(full_files_path, '.pl2')

# create df with all the relevant data per file
files_df = create_matlab_with_nd_df(matlab_files)
plexon_files_df = create_plexon_df(plexon_files)
files_df = pd.merge(files_df, plexon_files_df, left_index=True, right_index=True) # merge matlab and plexon dfs
intensity_file_path = os.path.join(parent_dir_path, '{}_intensities.csv'.format(intensities_number))
intensity_values_df = create_intensity_df(intensity_file_path)
files_df = pd.merge(files_df, intensity_values_df, on='ND') # add intensities data to files_df
#files_df['plexon_filename'] = plexon_files_df

# extract pl2 data into bin files and add number of samples per file to files_df
#files_df = extract_pl2_data_into_bin(files_df['plexon_filename'], files_df, files_path, 'WB')

files_df = files_df.sort_values(by=['plexon_time']) # sort df according to time of trial
# write the data to csv files
files_df.to_csv('{}/files_extracted_data_.csv'.format(files_path), sep=',', encoding='utf-8')

In [None]:
plexon_files_df

# main function

In [None]:
def pl2kilosort(trial_type, intensities_number):
    # get files list
    parent_dir_path = pathlib.Path(__file__).parent.absolute()
    files_path = 'files/{}'.format(trial_type)
    full_files_path = os.path.join(parent_dir_path, files_path)
    matlab_files = get_files_list(full_files_path, '.mat')
    plexon_files = get_files_list(full_files_path, '.pl2')
    
    # create df with all the relevant data per file
    files_df = create_matlab_with_nd_df(matlab_files)
    plexon_files_df = create_plexon_df(plexon_files)
    files_df = pd.merge(files_df, plexon_files_df, left_index=True, right_index=True) # merge matlab and plexon dfs
    intensity_file_path = os.path.join(parent_dir_path, '{}_intensities.csv'.format(intensities_number))
    intensity_values_df = create_intensity_df(intensity_file_path)
    files_df = pd.merge(files_df, intensity_values_df, on='ND') # add intensities data to files_df
    files_df['plexon_filename'] = plexon_files
    
    # extract pl2 data into bin files and add number of samples per file to files_df
    files_df = extract_pl2_data_into_bin(files_df['plexon_filename'], files_df, files_path, 'WB')
    
    files_df = files_df.sort_values(by=['plexon_time']) # sort df according to time of trial
    # write the data to csv files
    files_df.to_csv('{}/files_extracted_data_.csv'.format(files_path), sep=',', encoding='utf-8')
    # create dfs for on, off ts per event
    on_df = create_ts_df(files_df, 10)
    off_df = create_ts_df(files_df, 11)
    # combine the on, off ts per event to 1 df 
    combined_ts_df = pd.concat([on_df.add_suffix('_on'), off_df.add_suffix('_off')], axis=1, sort=False)
    # write the data to a csv file
    combined_ts_df.to_csv('{}/events_ts.csv'.format(files_path), sep=',', encoding='utf-8')
    
    # garbage collection
    gc.collect()
    
    # merge all bin files to 1 binary file (according to kilosort's requirements)
    merge_bins_to_one_bin(files_path, len(files_df) + 1)
    
    return