# Read .phot and .box files

#### Flow Here
1) Import control mat file from IM-1478 07252022 (OG_signal)
2) Test script that follow the equivalent process in MATLAB\
2-1) Load phot file\
2-2) Load box file\
2-3) Get visits from box.data\
2-4) Get signal from phot.data
3) Compare two sets of signal, ref, sig1, sig2

** I assumed those files are downloaded as the following structure from the Google Drive\
https://drive.google.com/drive/folders/1Vq7ufCkZoUgm2d82v--gOEdLBit05pE9?usp=drive_link
1. data/signals.mat
2. data/IM-1478_2022-07-25_15-24-22____Tim_Conditioning.phot
3. data/IM-1478_2022-07-25_15-24-22____Tim_Conditioning.box

## 1. Import original signal.mat

In [1]:
import scipy.io
import sys
sys.path.append("..") 

OG_signals = scipy.io.loadmat("../data/signals.mat",matlab_compatible=True)

# raw signals are saved as ndarrays 
OG_ref = OG_signals['ref']
OG_sig1 = OG_signals['sig1']
OG_sig2 = OG_signals['sig2']

print(f"Length of ref: {len(OG_ref[0])}")
print(f"Length of sig1: {len(OG_sig1[0])}")
print(f"Length of sig2: {len(OG_sig2[0])}")
OG_signals

Length of ref: 52047884
Length of sig1: 52047884
Length of sig2: 52047884


{'__header__': b'MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Tue Aug  9 11:15:51 2022',
 '__version__': '1.0',
 '__globals__': [],
 'loc': array([['N'],
        ['A'],
        ['c'],
        ['c'],
        ['-'],
        ['C'],
        ['o'],
        ['r'],
        ['e'],
        ['-'],
        ['L'],
        ['e'],
        ['f'],
        ['t'],
        [' ']], dtype='<U1'),
 'ref': array([[1176547.16464833, 1176549.04597333, 1176551.42754431, ...,
         1081373.91995359, 1081384.53742044, 1081394.79802668]]),
 'sig1': array([[1607135.10922693, 1607141.92601964, 1607148.94649728, ...,
         1512455.31702833, 1512478.46879176, 1512501.63149666]]),
 'sig2': array([[1273.83177836, 1259.97120373, 1245.52716997, ..., 1647.54118057,
         1659.39387947, 1670.3810918 ]]),
 'visits': array([[  150126.,   364974.,   756384.,   951313.,  1041221.,  1283594.,
          1561721.,  1746611.,  1900078.,  2163957.,  2597471.,  2740025.,
          3156575.,  3408167.,  3755882.,  38404

In [3]:
print(OG_signals['ref'])
print(not_OG_signals['ref'])

[[1176547.16464833 1176549.04597333 1176551.42754431 ... 1081373.91995359
  1081384.53742044 1081394.79802668]]
[[1176549.05021542 1176550.89934346 1176553.24865009 ... 1081373.61799505
  1081384.25049954 1081394.52613105]]


## 2-1. Read .phot data
def read_photometry_data(file):

In [3]:
import numpy as np
import struct

phot = {}
phot_file = '../data/IM-1478_2022-07-25_15-24-22____Tim_Conditioning.phot'

with open(phot_file, 'rb') as fid:
    
    # read binary data from the file, specifying the big-endian data format '>'
    phot['magic_key'] = struct.unpack('>I', fid.read(4))[0]
    phot['header_size'] = struct.unpack('>h', fid.read(2))[0]  
    phot['main_version'] = struct.unpack('>h', fid.read(2))[0]
    phot['secondary_version'] = struct.unpack('>h', fid.read(2))[0]
    phot['sampling_rate'] = struct.unpack('>h', fid.read(2))[0]
    phot['bytes_per_sample'] = struct.unpack('>h', fid.read(2))[0]
    phot['num_channels'] = struct.unpack('>h', fid.read(2))[0]    

    print(f"main_version: {phot['main_version']}")
    print(f"magic_key: {phot['magic_key']}")
    print(f"header_size: {phot['header_size']}")
    print(f"secondary_version: {phot['secondary_version']}")
    print(f"sampling_rate: {phot['sampling_rate']}")
    print(f"bytes_per_sample: {phot['bytes_per_sample']}")
    print(f"num_channels: {phot['num_channels']}")
    
    # after reading the character arrays, decode them from utf-8 and stripping the null characters (\x00)
    phot['file_name'] = fid.read(256).decode('utf-8').strip('\x00')
    phot['date'] = fid.read(256).decode('utf-8').strip('\x00')
    phot['time'] = fid.read(256).decode('utf-8').strip('\x00')

    print(f"file_name: {phot['file_name']}")
    print(f"date: {phot['date']}")
    print(f"time: {phot['time']}")

    # loop through the four channels and extract the location, signal, frequency, max and min values in the same way
    phot['channels'] = []

    # Initialize a list of empty dictionaries for the channels
    for i in range(4):
        phot['channels'].append({})

    # Read and decode the Location for all channels first
    for i in range(4):
        phot['channels'][i]['location'] = fid.read(256).decode('utf-8', errors='ignore').strip('\x00')
        print(f"channel {i+1} location: {phot['channels'][i]['location']}")

    # Read and decode the Signal for all channels
    for i in range(4):
        phot['channels'][i]['signal'] = fid.read(256).decode('utf-8', errors='ignore').strip('\x00')
        print(f"channel {i+1} signal: {phot['channels'][i]['signal']}")

    # Read Frequency for all channels
    for i in range(4):
        phot['channels'][i]['freq'] = struct.unpack('>h', fid.read(2))[0]
        print(f"channel {i+1} freq: {phot['channels'][i]['freq']}")

    # Read Max Voltage for all channels
    for i in range(4):
        phot['channels'][i]['max_v'] = struct.unpack('>h', fid.read(2))[0] / 32767.0
        print(f"channel {i+1} max_v: {phot['channels'][i]['max_v']}")

    # Read Min Voltage for all channels
    for i in range(4):
        phot['channels'][i]['min_v'] = struct.unpack('>h', fid.read(2))[0] / 32767.0
        print(f"channel {i+1} min_v: {phot['channels'][i]['min_v']}")

    phot['signal_label'] = []
    for signal in range(8):
        # phot['signal_label'].append(fid.read(256).decode('utf-8').strip('\x00'))
        signal_label = fid.read(256).decode('utf-8').strip('\x00')
        print(f"signal_label {signal+1}: {signal_label}")
        phot['signal_label'].append(signal_label)

    # handle the padding by reading until the header size is reached
    position = fid.tell()
    print(f"file position: {position}")
    
    pad_size = phot['header_size'] - position
    print(f"pad_size: {pad_size}")

    phot['pad'] = fid.read(pad_size)
    print(f"pad: {phot['pad']}")

    # reshape the read data into a 2D array where the number of channels is the first dimension
    data = np.fromfile(fid, dtype=np.dtype('>i2'))
    phot['data'] = np.reshape(data, (phot['num_channels'], -1), order='F')
    print(f"reshaped data: {phot['data'].shape}")

print(phot)

main_version: 0
magic_key: 22289481
header_size: 20480
secondary_version: 0
sampling_rate: 10000
bytes_per_sample: 2
num_channels: 8
file_name: C:\Users\BerkeLab\Documents\Labview\IM-1478\2022-07-25\IM-1478_2022-07-25_15-24-22____Tim_Conditioning.phot                                                                                                                                                     
date: 2022-07-25                                                                                                                                                                                                                                                      
time: 15-24-22                                                                                                                                                                                                                                                        
channel 1 location: DLS - Right                                          

## 2-2. Read .box data
def read_box_data(file):

In [4]:
box = {}
box_file = '../data/IM-1478_2022-07-25_15-24-22____Tim_Conditioning.box'

with open(box_file, 'rb') as fid:
    
    # Read binary data with big-endian (">")
    box['magic_key'] = struct.unpack('>I', fid.read(4))[0]
    box['header_size'] = struct.unpack('>h', fid.read(2))[0]
    box['main_version'] = struct.unpack('>h', fid.read(2))[0]    
    box['secondary_version'] = struct.unpack('>h', fid.read(2))[0]
    box['sampling_rate'] = struct.unpack('>h', fid.read(2))[0]
    box['bytes_per_sample'] = struct.unpack('>h', fid.read(2))[0]
    box['num_channels'] = struct.unpack('>h', fid.read(2))[0]

    print(f"box header_size: {box['header_size']}")
    print(f"box magic_key: {box['magic_key']}")    
    print(f"box main_version: {box['main_version']}")
    print(f"box secondary_version: {box['secondary_version']}")
    print(f"box sampling_rate: {box['sampling_rate']}")
    print(f"box bytes_per_sample: {box['bytes_per_sample']}")
    print(f"box num_channels: {box['num_channels']}")
    
    # Read and decode file name, date, and time
    box['file_name'] = fid.read(256).decode('utf-8').strip('\x00')
    box['date'] = fid.read(256).decode('utf-8').strip('\x00')
    box['time'] = fid.read(256).decode('utf-8').strip('\x00')    

    print(f"box date: {box['date']}")
    print(f"box file_name: {box['file_name']}")    
    print(f"box time: {box['time']}")

    # Read channel locations
    box['ch1_location'] = fid.read(256).decode('utf-8').strip('\x00')
    box['ch2_location'] = fid.read(256).decode('utf-8').strip('\x00')
    box['ch3_location'] = fid.read(256).decode('utf-8').strip('\x00')    

    print(f"box ch2_location: {box['ch2_location']}")
    print(f"box ch1_location: {box['ch1_location']}")    
    print(f"box ch3_location: {box['ch3_location']}")

    # Get current file position
    position = fid.tell()
    print(f"box file position: {position}")
    
    # Calculate pad size and read padding
    pad_size = box['header_size'] - position
    print(f"box pad_size: {pad_size}")

    box['pad'] = fid.read(pad_size)
    print(f"box pad: {box['pad']}")

    # Read the remaining data and reshape it
    data = np.fromfile(fid, dtype=np.uint8)
    box['data'] = np.reshape(data, (box['num_channels'], -1), order='F')
    print(f"box reshaped data: {box['data'].shape}")

print(box)

box header_size: 20480
box magic_key: 22289481
box main_version: 0
box secondary_version: 0
box sampling_rate: 10000
box bytes_per_sample: 1
box num_channels: 3
box date: 2022-07-25                                                                                                                                                                                                                                                      
box file_name: C:\Users\BerkeLab\Documents\Labview\IM-1478\2022-07-25\IM-1478_2022-07-25_15-24-22____Tim_Conditioning.box                                                                                                                                                      
box time: 15-24-22                                                                                                                                                                                                                                                        
box ch2_location: Connector1/DIOP

## 2-3. Process pulse (port entry) from box data
def process_pulses(box):

In [5]:
diff_data = np.diff(box['data'][2, :].astype(np.int16))
start = np.where(diff_data < -1)[0][0]
pulses = np.where(diff_data > 1)[0]
visits = pulses - start
print(f'start: \n{start} \n pulses: \n{pulses} \n visits: \n{visits}')

start: 
578116 
 pulses: 
[  728242   943090  1334500  1529429  1619337  1861710  2139837  2324727
  2478194  2742073  3175587  3318141  3734691  3986283  4333998  4418582
  4708921  4870771  5172847  5255925  5511146  5643945  5749835  5951341
  6222208  6355465  6588314  6817095  6943502  7193650  7480093  7713857
  7894108  8065727  8413348  8676689  9214351  9483402  9694118  9894892
 10785823 11106421 11283818 11388883 12322704 12509729 12636683 12796426
 12897975 13081646 13163132 13536228 13841598 14072203 14540575 14726567
 14866288 15141100 15242557 15419178 15719088 15862990 16016247 16132306
 16291573 16445088 16996714 17193195 17778117 18345155 18457127 18639338
 18905771 19153430 19283980 19440235 19760061 19894861 20120597 20278451
 20370718 20642826 20741999 20901595 21031299 21266773 21575283 22079168
 22146794 22307857 23195816 23270851 23863354 24223921 24369464 26063517
 26464328 26693363 26976023 27232142 28714928 28894634 29244690 29419514
 29582065 29768386 299362

## 2-4. Final Boss. LOCKIN DETECTION!

Key Steps
1. High-pass Filtering:\
    The input signal is first high-pass filtered to remove low-frequency noise.

2. Demodulation:\
    The input signal is multiplied (demodulated) by two reference signals (exc1 and exc2), which are typically sinusoidal signals
    exc1 is typically in-phase with the signal of interest.
    exc2 is 90 degrees out of phase with exc1 (quadrature).

3. (Option) De-trending:\
    If de-trending is enabled, a band-pass filter is applied to remove low-frequency components and any remaining noise.

4. Combination (Full Mode):\
    In full mode, after demodulating the signal, both the in-phase and quadrature components are combined using the Pythagorean theorem to reconstruct the signal's amplitude.


def lockin_detection(input_signal, exc1, exc2, Fs, **kwargs):

#### But first, set the Lockin Detection Function

In [6]:
def lockin_detection(input_signal, exc1, exc2, Fs, **kwargs):
    # Default values
    filter_order = 5
    tau = 10
    de_trend = False
    full = False

    # Parse optional arguments
    for key, value in kwargs.items():
        if key == 'tau':
            tau = value
        elif key == 'filterorder':
            filter_order = value
        elif key == 'detrend':
            de_trend = value
        elif key == 'full':
            full = value
        else:
            print(f'Invalid optional argument: {key}')

    tau /= 1000  # Convert to seconds
    Fc = 1 / (2 * np.pi * tau)
    fL = 0.01

    # High-pass filter design (Same as MATLAB filter design)
    b, a = butter(filter_order, Fc / (Fs / 2), 'high')
    
    # Single-direction filtering to match MATLAB's 'filter'
    input_signal = lfilter(b, a, input_signal)

    # Demodulation
    demod1 = input_signal * exc1
    demod2 = input_signal * exc2

    # Trend filter design
    if de_trend:
        b, a = butter(filter_order, np.array([fL, Fc]) / (Fs / 2), btype="bandpass")
    else:
        b, a = butter(filter_order, Fc / (Fs / 2))

    if not full:
        # Use lfilter for single-direction filtering
        sig1 = lfilter(b, a, demod1)
        sig2 = lfilter(b, a, demod2)
    else:
        # Full mode
        sig1x = lfilter(b, a, demod1)
        sig2x = lfilter(b, a, demod2)

        # Get imaginary part of the Hilbert transform for phase-shifted signal
        exc1_hilbert = np.imag(hilbert(exc1))
        exc2_hilbert = np.imag(hilbert(exc2))

        demod1 = input_signal * exc1_hilbert
        demod2 = input_signal * exc2_hilbert
        
        sig1y = lfilter(b, a, demod1)
        sig2y = lfilter(b, a, demod2)

        # Combine signals using Pythagorean theorem
        sig1 = np.sqrt(sig1x**2 + sig1y**2)
        sig2 = np.sqrt(sig2x**2 + sig2y**2)

    print(f" a : {a}, b: {b}")
    print(input_signal.shape)
    return sig1, sig2

def lockin_detection_main(phot, start, pathstr):

In [7]:
from scipy.signal import butter, filtfilt, lfilter, hilbert

tau = 10
filter_order = 5

# Get the necessary data from the phot structure
detector = phot['data'][5, :]
exc1 = phot['data'][6, :]
exc2 = phot['data'][7, :]

# Call lockin_detection function
sig1, ref = lockin_detection(detector, exc1, exc2, phot['sampling_rate'], tau=tau, filterorder=filter_order, detrend=False, full=True)

detector = phot['data'][2, :]
exc1 = phot['data'][0, :]
exc2 = phot['data'][1, :]

# Call lockin_detection function for the second set of signals
sig2, ref2 = lockin_detection(detector, exc1, exc2, phot['sampling_rate'], tau=tau, filterorder=filter_order, detrend=False, full=True)

# Cut off the beginning of the signal to match behavioral data
sig1 = sig1[start:]
sig2 = sig2[start:]
ref = ref[start:]
ref2 = ref2[start:]

loc = phot['channels'][2]['location'][:15]  # First 15 characters of the location

# Save signals to a file
np.savez(f"../data/signals.npz", sig1=sig1, sig2=sig2, ref=ref, loc=loc, visits=visits)


 a : [ 1.         -4.96763936  9.87108043 -9.8073999   4.872116   -0.96815716], b: [3.07497117e-12 1.53748559e-11 3.07497117e-11 3.07497117e-11
 1.53748559e-11 3.07497117e-12]
(52626000,)
 a : [ 1.         -4.96763936  9.87108043 -9.8073999   4.872116   -0.96815716], b: [3.07497117e-12 1.53748559e-11 3.07497117e-11 3.07497117e-11
 1.53748559e-11 3.07497117e-12]
(52626000,)


## 3. Compare!?

In [9]:
print('COMPARISON')
print(f"Length of ref from Python: {len(ref)}, and MATLAB: {len(OG_signals['ref'][0])} ")
print(ref)
print(OG_signals['ref'])
print("-" * 40) 
print(f"Length of sig1 from Python: {len(sig1)}, and MATLAB: {len(OG_signals['sig1'][0])}")
print(sig1)
print(OG_signals['sig1'])
print("-" * 40) 
print(f"Length of sig2 from Python: {len(sig2)}, and MATLAB: {len(OG_signals['sig2'][0])}")
print(sig2)
print(OG_signals['sig2'])

# print(f"Lenth of visits: {len(visits)}")
# print(visits)

import matplotlib.pyplot as plt

plt.plot(ref[0:10])
plt.plot(OG_signals['ref'][0][0:10])
plt.show()



COMPARISON
Length of ref from Python: 52047884, and MATLAB: 52047884 
[1176525.72992475 1176527.60317482 1176529.9766463  ... 1081361.67853111
 1081372.31895114 1081382.60249975]
[[1176547.16464833 1176549.04597333 1176551.42754431 ... 1081373.91995359
  1081384.53742044 1081394.79802668]]
----------------------------------------
Length of sig1 from Python: 52047884, and MATLAB: 52047884
[1607106.41941195 1607113.23308341 1607120.25042529 ... 1512435.09277104
 1512458.25339874 1512481.42500401]
[[1607135.10922693 1607141.92601964 1607148.94649728 ... 1512455.31702833
  1512478.46879176 1512501.63149666]]
----------------------------------------
Length of sig2 from Python: 52047884, and MATLAB: 52047884
[1273.50998273 1259.65315484 1245.21321084 ... 1647.18057379 1659.03751545
 1670.02904287]
[[1273.83177836 1259.97120373 1245.52716997 ... 1647.54118057
  1659.39387947 1670.3810918 ]]


: 

: 