# Configuration

In [11]:
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

from pymatreader import read_mat
import json 

project_root = Path("/home/heberto/buzaki")
session_path = project_root /  "fCamk1_200827_sess9"
assert session_path.is_dir()

session_files_path_list = list(session_path.iterdir())
# Dump to a file in the same folder
json_directory = Path.cwd() / "_json_files"
json_directory.mkdir(exist_ok=True)


# Adjust the figure size
plt.figure(figsize=(16, 8))
# Adjust all the fonts
plt.rcParams.update({'font.size': 25})

<Figure size 1600x800 with 0 Axes>

# Some helper functions

In [12]:
def format_files_as_string(files):
    # Add a tuple with size
    files = [(file_path.name, f"size: {file_path.stat().st_size / 1024**2 :,.2f} MiB") for file_path in files]

    # Sort the list by file size in descending order
    files.sort(key=lambda x: x[1], reverse=True)

    # Determine the maximum width for alignment based on longest file name length
    max_file_name_length = max(len(name) for name, _ in files)
    max_size_width = max_file_name_length + len("  size: ")

    # Create new list with formatted strings
    file_as_string = [f"{name.ljust(max_file_name_length)} {size}" for name, size in files]
    
    return file_as_string


def build_keys_and_types(dictionary):
    """
    Construct a new dictionary that contains type and optionally value or shape information 
    for each value in the input dictionary. This function handles nested dictionaries and treats 
    numpy arrays and lists in a specific way.

    The function creates a new key-value pair for every key-value pair in the input dictionary. 
    The value is another dictionary that always contains the 'type' key and might contain the 'value', 
    'shape' or 'length' key depending on the original value's type and size.

    Parameters
    ----------
    dictionary : dict
        The input dictionary to be analyzed. It may contain nested dictionaries, numpy arrays,
        lists, or other types of values.

    Returns
    -------
    output_dict : dict
        The output dictionary that provides detailed information about each value in the original dictionary.
        The value corresponding to each key is another dictionary that contains the following key-value pairs:
        - 'type': A string describing the type of the original value.
        - 'value': A string representing the original value. This key is present if the original value 
        is not a numpy array or list, or if it's a small numpy array or list (i.e., with fewer than 10 elements).
        - 'shape': A string representing the shape of the original numpy array. This key is present if the 
        original value is a numpy array with 10 or more elements.
    """
    output_dict = {}
    
    for key, value in dictionary.items():
        if isinstance(value, dict):
            output_dict[key] = build_keys_and_types(value)
        else:
            output_dict[key] = extract_value_info(value)

    return output_dict


def extract_value_info(value):
    """
    Generate a dictionary that contains type and optionally value or shape information about the input value.

    This function always adds the 'type' key to the output dictionary. Depending on the input value's type and size,
    it might also add the 'value', 'shape', or 'length' key.

    Parameters
    ----------
    value : object
        The input value. It can be of any type, including numpy array and list.

    Returns
    -------
    value_info : dict
        The output dictionary that contains the following key-value pairs:
        - 'type': A string describing the input value's type.
        - 'value': A string representing the input value. This key is present if the input value is not a numpy 
        array or list, or if it's a small numpy array or list (i.e., with fewer than 10 elements).
        - 'shape': A string representing the shape of the input value if it's a numpy array with 10 or more elements.
    """
    value_info = {"type": str(type(value))}
    
    if isinstance(value, (np.ndarray, list)):
        length = len(value) if isinstance(value, list) else value.size
        value_info["shape"] = str(value.shape) if isinstance(value, np.ndarray) else str(length)
        no_list_or_array_inside = not any(isinstance(item, (np.ndarray, list)) for item in value)
        if length < 10 and no_list_or_array_inside:
            value_info["value"] = str(value)
    else:
        value_info["value"] = str(value)

    return value_info

# Session overview

#### Mat files

In [None]:
# Retrieve file paths and sizes

condition = lambda file_path: ".mat" in file_path.name
files = [file_path for file_path in session_files_path_list if condition(file_path)]

files_as_string = format_files_as_string(files)
files_as_string

### The rest

In [None]:
condition = lambda file_path : ".mat" not in file_path.name and file_path.is_file()
files = [file_path for file_path in session_files_path_list if condition(file_path)]

files_as_string = format_files_as_string(files)
files_as_string


This is the output:

* `analogin.dat`, `8,203.18 MiB` : Probably raw data
* `fCamk1_200827_sess9.dat`, `32,812.73 MiB` : TODO
* `auxiliary.dat`, `3,076.19 MiB` : TODO
* `time.dat`, `2,050.80 MiB` :  TODO
* `TrialMapsAndRasters.pptx`, `1.02 MiB` : Collection of rasters, I don't think it matters. 
* `fCamk1_200827_sess9.lfp`, `1,367.20 MiB` :  LFP
* `digitalin.dat`, `1,025.40 MiB` :  TODO
* `supply.dat`, `1,025.40 MiB` :  TODO
* `pulTime.npy`, `0.32 MiB` : TODO
* `fCamk1_200827_sess9_HSE.HSE.evt`, `0.28 MiB` : TODO 
* `deepSuperficial_classification_fromRipples.png`, `0.14 MiB` : Figure, probably does not matter 
* `bz_DetectSWR.log`, `0.05 MiB` :  TODO
* `bz_DetectSWR_manu.log`, `0.05 MiB` : TODO
* `fCamk1_200827_sess9.xml`, `0.04 MiB` : TODO
* `fCamk1_200827_sess9.nrs`, `0.00 MiB` : TODO
* `4nA pulses.txt`, `0.00 MiB` : TODO
* `info.rhd`, `0.00 MiB` : TODO

#### Folders / Directories

In [None]:
condition = lambda file_path : ".mat" not in file_path.name and file_path.is_file()
files = [file_path for file_path in session_files_path_list if condition(file_path)]

files_as_string = format_files_as_string(files)
files_as_string

# Explore specific files

## Sessions

### `session.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.session.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

In [None]:
session_data = mat_file["session"]
session_data.keys()

In [None]:
session_data["general"]

In [None]:
surgeries_data = session_data["animal"]["surgeries"]
weight = surgeries_data["weight"]
f"{weight / 1000:2.3f} kg"

### `sessionInfo.mat`

In [None]:

file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.sessionInfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

### `.EventExplorer.SessionMetadata`

In [None]:


file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.EventExplorer.SessionMetadata.mat"

assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

This is empty

## Behavior 

### `Behavior.mat`

In [None]:
file_path = session_path / f"{session_path.stem}.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


### `behavior.cellinfo.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.behavior.cellinfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


I think this is the file with the most information. 

### `Tracking.Behavior.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.Tracking.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["tracking"]["position"]["x"][:5]

## Subfolder

### `Tracking.Behavior.mat` subfolder

In [None]:
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / f"{sub_folder.stem}.Tracking.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["tracking"]["position"]["x"][:5]

It seems that this in the file above have the same position data

### `Linearized.Behavior.mat` subfolder

In [None]:
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / f"{sub_folder.stem}.Linearized.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["behavior"]["position"]["x"][:5]

And it is the same position again here. 

However, this has the trials as well.

In [None]:
mat_file["behavior"]["trials"]["startPoint"][:5]

This seems the position and not the times as we need. Let's look for other folder to see if we can find the trials

#### Camera matlab file

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "Basler_acA1280-60gc__21606137__20200827_110730202.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "camera" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)




Here they just copied the camera frame by frame (!)

```

{
  "frames": {
    "r": {
      "type": "<class 'numpy.ndarray'>",
      "shape": "(1024, 204, 53143)"
    }
  }
}
```
How long is the video 53_143 / 30 = 1_771.43 seconds -> 29.52 minutes

Taking the sampling_rate of 30 Hz, we can see that the video is 1_771.43 seconds long and the ntransform to minutes.

### Virtual maze mat

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "virtualMaze.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = f"{file_path.stem}" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


This seems just like a map of the maze

```
  "maze": {
    "type": "<class 'numpy.ndarray'>",
    "value": "[[ 11.09870461 109.25348571]\n [ 12.12486221   0.85756268]]"
  }
```

### Roi tracking .mat file

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "roiTracking.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = f"{file_path.stem}" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


I think this is a just a map with the reigon where they are doing the tracking of the position with the camera:

```
  "roiTracking": {
    "type": "<class 'numpy.ndarray'>",
    "value": "[[  31.25 1015.  ]\n [ 185.75 1019.5 ]\n [ 184.25    5.5 ]\n [  17.75    2.5 ]\n [  31.25 1015.  ]]"
  }
```

### Amplifier digital events `.mat`

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "amplifier.DigitalIn.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = f"{file_path.stem}" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["digitalIn"].keys()

In [None]:
mat_file["digitalIn"]["ints"][0].shape

In [None]:
mat_file["digitalIn"]["ints"][0]  # I think those ar the intervals of the pulses in the local time for the behavior

In [None]:
mat_file["digitalIn"]["dur"][1]  # I don't think they are ordered though

This is pulse duration

In [None]:
mat_file["digitalIn"]["intsPeriods"]  # Note sure what it is this

#### TTL / Synch

```
dict_keys(['timestampsOn', 'timestampsOff', 'ints', 'dur', 'intsPeriods'])
```
I think this is the TTLs that synchronize the events. Come to this again.

In [None]:
mat_file["digitalIn"]["timestampsOff"][0].shape

In [None]:
mat_file["digitalIn"]["timestampsOff"][3].shape

In [None]:
mat_file["digitalIn"]["dur"][0].shape

### Analog signals in sub-folder

We have the following
* `auxiliary.dat`
* `supply.dat`
* `time.dat`
* `digitalin.dat`

But two xml files:
* `fCamk1_200827_sess9.xml` which I guess it the main recorder
* `amplifier.xml` which I don't know what it does.

The first is on the top directory, the other one is here.

Note that in the sub-folder we only have `amplifier.xml` are they different?

Let's test this

In [None]:
xml_file_path =  session_path /  "fCamk1_200827_sess9.xml"

from xml.etree import ElementTree

tree = ElementTree.parse(xml_file_path)
root = tree.getroot()
acq = root.find('acquisitionSystem')
nbits = int(acq.find('nBits').text)
num_channels = int(acq.find('nChannels').text)
sampling_rate = float(acq.find('samplingRate').text)
voltage_range = float(acq.find('voltageRange').text)
# offset = int(acq.find('offset').text)
amplification = float(acq.find('amplification').text)

print(f"{num_channels=}, {sampling_rate=}, {voltage_range=}, {amplification=}")


In [None]:

sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()

xml_file_path =  sub_folder /  "amplifier.xml"

from xml.etree import ElementTree

tree = ElementTree.parse(xml_file_path)
root = tree.getroot()
acq = root.find('acquisitionSystem')
nbits = int(acq.find('nBits').text)
num_channels = int(acq.find('nChannels').text)
sampling_rate = float(acq.find('samplingRate').text)
voltage_range = float(acq.find('voltageRange').text)
# offset = int(acq.find('offset').text)
amplification = float(acq.find('amplification').text)

print(f"{num_channels=}, {sampling_rate=}, {voltage_range=}, {amplification=}")


Output:
The top level:
```
num_channels=32, sampling_rate=30000.0, voltage_range=20.0, amplification=1000.0
```

The amplifier:
```
num_channels=32, sampling_rate=30000.0, voltage_range=20.0, amplification=1000.0
```

They seem similar. I wonder what we have two but we have a lot of redundancy in the conversion data.

### Signals

In [None]:
signal_names = ["digitalin.dat", "time.dat", "supply.dat", "auxiliary.dat"]


from spikeinterface.extractors.neoextractors import NeuroScopeRecordingExtractor

sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()

for name in signal_names:
    file_path = sub_folder / f"{name}" 
    assert file_path.is_file(), file_path

    sig_dtype = 'int16' if nbits <= 16 else 'int32'
    data = np.memmap(file_path, dtype=sig_dtype, mode='r', offset=0).reshape(-1, num_channels)
    time = data.shape[0] / sampling_rate
    num_samples = data.shape[0]

    print("---------------")
    print(f"{file_path.name=} \n")
    print(f"num_samples: {num_samples:,}, time: {time:.2f} seconds")

Output:
```
---------------
file_path.name='digitalin.dat' 

num_samples: 1,684,470, time: 56.15 seconds
---------------
file_path.name='time.dat' 

num_samples: 3,368,940, time: 112.30 seconds
---------------
file_path.name='supply.dat' 

num_samples: 1,684,470, time: 56.15 seconds
---------------
file_path.name='auxiliary.dat' 

num_samples: 5,053,410, time: 168.45 seconds
```

## `ripples.events.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.ripples.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

In [None]:
ripple_intervals = ripples_data["timestamps"]
peaks = ripples_data["peaks"]
peak_normed_power = ripples_data["peakNormedPower"]

ripple_stats_data = ripples_data["rippleStats"]["data"]
peak_frequency = ripple_stats_data["peakFrequency"]
peak_duration = ripple_stats_data["duration"]
peak_amplitude = ripple_stats_data["peakAmplitude"]

peaks.shape, peak_normed_power.shape, peak_frequency.shape, peak_duration.shape, peak_amplitude.shape

In [None]:
# Extract indexed data
ripple_stats_maps = ripples_data["rippleStats"]["maps"]

ripple_raw = ripple_stats_maps["ripples_raw"]
ripple_frequency = ripple_stats_maps["frequency"]
ripple_phase = ripple_stats_maps["phase"]
ripple_amplitude = ripple_stats_maps["amplitude"]
ripple_raw.shape, ripple_frequency.shape, ripple_phase.shape, ripple_amplitude.shape

## Channels

### Channel map

In [None]:
file_path = session_path / "chanMap.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "chanMap"+ ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
connected = mat_file["connected"]  
connected

In [None]:
mat_file["chanMap"]

In [None]:
mat_file["chanMap0ind"]

In [None]:
mat_file["kcoords"]

In [None]:
colors = {2: "red", 1: "blue", 3: "green", 4: "yellow"}
channel_to_shank =  mat_file["kcoords"]
colors_for_k_cores = [colors[int(k)] for k in channel_to_shank]
channel_index = mat_file["chanMap"]
channel_index = mat_file["chanMap0ind"]

x_coords = mat_file["xcoords"][:]
y_coords = mat_file["ycoords"][:]

plt.scatter(x_coords, y_coords, color=colors_for_k_cores)

for i in range(x_coords.size):
    channel_text = f"idx={channel_index[i]} - grp={channel_to_shank[i]}"
    plt.text(x_coords[i], y_coords[i], channel_text, size=10)

In [None]:
from neuroconv.datainterfaces import NeuroScopeRecordingInterface

session_id = session_path.stem

# Add Recording
file_path = session_path / f"{session_id}.dat"
assert file_path.is_file()
xml_file_path = session_path / f"{session_id}.xml"

interface = NeuroScopeRecordingInterface(file_path=str(file_path), xml_file_path=str(xml_file_path))

In [None]:
interface.recording_extractor.get_channel_ids()

In [None]:
interface.recording_extractor.get_property("group_name")

In [None]:
interface.recording_extractor.get_property("channel_name")

In [None]:
channel_ids = interface.recording_extractor.get_channel_ids()

small_trace = interface.recording_extractor.get_traces(start_frame=0, end_frame=1000, channel_ids=channel_ids)
small_trace_no_channels = interface.recording_extractor.get_traces(start_frame=0, end_frame=1000)

# Small test to see if traces are layed out as I expect
np.testing.assert_allclose(small_trace, small_trace_no_channels)

In [None]:
colors = {2: "red", 1: "blue", 3: "green", 4: "yellow"}
channel_to_shank =  mat_file["kcoords"]
colors_for_k_cores = [colors[int(k)] for k in channel_to_shank]
channel_index = mat_file["chanMap"]
channel_index = mat_file["chanMap0ind"]
channel_name = interface.recording_extractor.get_property("channel_name")


x_coords = mat_file["xcoords"][:]
y_coords = mat_file["ycoords"][:]

plt.scatter(x_coords, y_coords, color=colors_for_k_cores)

for i in range(x_coords.size):
    channel_text = f"idx={channel_index[i]} - grp={channel_to_shank[i]}"
    x = mat_file["xcoords"][i]
    y = mat_file["ycoords"][i]
    plt.text(x, y, channel_text, size=10)
    name = channel_name[i]
    plt.text(x, y - 5, name, size=10)

In [None]:
recording_extractor = interface.recording_extractor
recording_to_matlab_data_map = []
channel_ids_in_matlab = mat_file["chanMap0ind"]


channel_ids_in_matlab_str = [str(channel_ids_in_matlab[i]) for i in channel_ids_in_matlab]
locations = np.array([mat_file["xcoords"], mat_file["ycoords"]]).T
recording_extractor.set_channel_locations(channel_ids=channel_ids_in_matlab_str, locations=locations)



In [None]:
recording_extractor.set_property(key="brain_area", values=["CA1"] * recording_extractor.get_num_channels())

In [None]:
recording_extractor.get_property("brain_area")

In [None]:
recording_extractor.get_property_keys()

In [None]:
recording_extractor.get_property("group_name")

In [None]:
group_name = recording_extractor.get_property("group_name")
group_to_color_map = {"Group2": "red", "Group1": "blue", "Group3": "green", "Group4": "yellow"}
colors_for_groups = [group_to_color_map[name] for name in group_name]

location = recording_extractor.get_property("location")
x_coords, y_coords = location[:, 0], location[:, 1]

# plt.scatter(x, y)
plt.scatter(x_coords, y_coords, color=colors_for_groups)

channel_name = recording_extractor.get_property("channel_name")
for i in range(x_coords.size):
    x = x_coords[i]
    y = y_coords[i]
    channel_text = channel_name[i]
    plt.text(x, y, channel_text, size=10)


In [None]:
recording_extractor.get_channel_ids()

In [None]:
output_dir_path = Path.home() / "conversion_nwb"

stub_path_list = list((output_dir_path / "nwb_stub").iterdir())

file_path = stub_path_list[0]

import pynwb 

# Open file with pynwb
io = pynwb.NWBHDF5IO(str(file_path), mode='r', load_namespaces=True)
nwbfile = io.read()


In [None]:
data_frame = nwbfile.electrodes.to_dataframe()
x_coords, y_coords, group_name = data_frame.rel_x, data_frame.rel_y, data_frame.group_name

In [None]:
data_frame

In [None]:
group_to_color_map = {"Group2": "red", "Group1": "blue", "Group3": "green", "Group4": "yellow"}
colors_for_groups = [group_to_color_map[name] for name in group_name]

location = recording_extractor.get_property("location")

# plt.scatter(x, y)
plt.scatter(x_coords, y_coords, color=colors_for_groups)

channel_name = recording_extractor.get_property("channel_name")
for i in range(x_coords.size):
    x = x_coords[i]
    y = y_coords[i]
    channel_text = channel_name[i]
    plt.text(x, y, channel_text, size=10)


In [None]:
io.close()

### chanCoords.channelInfo

In [None]:

file_path = session_path / f"{session_path.stem}.chanCoords.channelInfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "chanCoords"+ ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


### Brain regions

In [None]:
file_path = session_path / f"{session_path.stem}.brainRegions.channelInfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "brainRegions.channelInfo"+ ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


## LED

In [None]:
condition = lambda file_path : "LED" in file_path.name
led_files = [file_path for file_path in session_files_path_list if condition(file_path)] 

files_as_string = format_files_as_string(led_files)
files_as_string

In [None]:
for file_path in led_files:
    if "uLEDResponse_raster" in file_path.name:
        continue
    print(file_path.name)
    assert file_path.is_file(), file_path

    mat_file = read_mat(file_path) 

    result = build_keys_and_types(mat_file)
    json_output = json.dumps(result, indent=2)

    json_name = "".join(file_path.suffixes)[1:] + ".json"
    with open(json_directory / json_name, 'w') as f:
        f.write(json_output)


## Optogenetic

In [None]:
condition = lambda file_path : "optogenetic" in file_path.name
files = [file_path for file_path in session_files_path_list if condition(file_path)] 

files_as_string = format_files_as_string(files)
files_as_string

In [None]:
file_path = session_path / f"{session_path.stem}.optogeneticPulses.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
file_path = session_path / f"{session_path.stem}.optogeneticResponse.cellinfo.mat"
assert file_path.is_file(), file_path

import h5py 

file_path = h5py.File(file_path, 'r')
# mat_file = read_mat(file_path) 
file_path["optogeneticResponses"].keys()


In [None]:
file_path["optogeneticResponses"]["stimulationEpochs"][:][:, :2]

## EEG

In [None]:
condition = lambda file_path : "eeg" in file_path.name
files = [file_path for file_path in session_files_path_list if condition(file_path)] 

files_as_string = format_files_as_string(files)
files_as_string



In [None]:
file_path = session_path / f"{session_path.stem}.eegstates.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


## `SleepState.states`

In [None]:
file_path = session_path / f"{session_path.stem}.SleepState.states.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


## EMG
It seems there is only one file

In [None]:
file_path = session_path / f"{session_path.stem}.EMGFromLFP.LFP.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

## Theta (`thetaEpochs.states`)

In [None]:
file_path = session_path / f"{session_path.stem}.thetaEpochs.states.mat"
assert file_path.is_file(), file_path
mat_file = read_mat(file_path)

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


## Pulses (`pulses.events.mat`)

In [None]:
file_path = session_path / f"{session_path.stem}.pulses.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["pulses"].keys()

In [None]:
mat_file["pulses"]["eventGroupID"][:][:5]

In [None]:
mat_file["pulses"]["analogChannel"][:][:5]

## HSE

In [3]:
file_path = session_path / f"{session_path.stem}.HSE.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


  warn('Complex objects (like classes) are not supported. '


In [4]:
hse_data = mat_file["HSE"]
start_time, stop_time = hse_data["timestamps"][:, 0], hse_data["timestamps"][:, 1] 
peaks = hse_data["peaks"]
event_id_labels = hse_data["eventIDlabels"]
event_id = hse_data["eventID"]
center = hse_data["center"]

np.unique(event_id_labels), np.unique(event_id), np.unique(peaks).size, np.unique(center).size

(array(['HSE'], dtype='<U3'), array([1], dtype=uint8), 3952, 3952)

In [9]:
center[:25]

array([  6.81  ,  12.365 ,  12.789 ,  14.8345,  15.9645,  21.346 ,
        28.087 ,  35.8615,  37.531 ,  39.9335,  43.3225,  45.118 ,
        45.3245,  47.5805,  48.3395,  58.8215,  59.101 ,  66.89  ,
        81.6045,  84.6415,  99.5275, 103.814 , 112.7415, 116.454 ,
       117.4325])

In [10]:
peaks[:25]

array([  6.716,  12.354,  12.791,  14.878,  15.987,  21.357,  28.092,
        35.875,  37.499,  40.02 ,  43.311,  45.064,  45.353,  47.574,
        48.391,  58.83 ,  59.104,  66.911,  81.608,  84.518,  99.583,
       103.83 , 112.749, 116.465, 117.459])

In [5]:
start_time[:5], stop_time[:5]

(array([ 6.679, 12.277, 12.739, 14.751, 15.903]),
 array([ 6.941, 12.453, 12.839, 14.918, 16.026]))

## UDE states

In [None]:
file_path = session_path / f"{session_path.stem}.UDStates.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

In [None]:
up_and_down_states_data = mat_file["UDStates"]

intervals = up_and_down_states_data["ints"]
up_intervals = intervals["UP"]
up_start_time, up_stop_time = up_intervals[:, 0], up_intervals[:, 1]
down_intervals = intervals["DOWN"]
down_start_time, down_stop_time = down_intervals[:, 0], down_intervals[:, 1]

up_start_time.shape, down_start_time.shape, up_stop_time.shape, down_stop_time.shape


In [None]:
# Combine and sort UP and DOWN intervals
combined_start_times = np.concatenate((up_start_time, down_start_time))
combined_stop_times = np.concatenate((up_stop_time, down_stop_time))
combined_states = np.array(['UP'] * len(up_start_time) + ['DOWN'] * len(down_start_time))

# Create an array of indices that sorts the start times
sort_indices = np.argsort(combined_start_times)

# Sort all arrays using the sorting indices
combined_start_times = combined_start_times[sort_indices]
combined_stop_times = combined_stop_times[sort_indices]
combined_states = combined_states[sort_indices]

# Create TimeIntervals
states_intervals = TimeIntervals(name='states_intervals')

# Add a new column for states
states_intervals.add_column(name='state', description='State (UP or DOWN)', data=combined_states)

# Add intervals
for start, stop, state in zip(combined_start_times, combined_stop_times, combined_states):
    states_intervals.add_interval(start, stop, state=state)

## ACG 

In [None]:
file_path = session_path / f"{session_path.stem}.ACGPeak.cellinfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

## Spikes cell info (`spikes.cellinfo.mat`)

In [None]:
file_path = session_path / f"{session_path.stem}.spikes.cellinfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

In [None]:
file_path = session_path / f"{session_path.stem}.spikes.cellinfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)

In [None]:
spikes_data = mat_file["spikes"]
spikes_data.keys()

In [None]:
type(spikes_data["times"]), type(spikes_data["ts"])

In [None]:
spikes_data["ts"][0]

In [None]:
for index in range(10):
    plt.scatter(spikes_data["times"][index], (index + 1 ) * np.ones(spikes_data["times"][index].size), s=1)

In [None]:
[(min(spike_train), max(spike_train)) for spike_train in spikes_data["times"]]

## `ws_temp.mat`

In [None]:
file_path = session_path / f"ws_temp.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "ws_temp" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


## Analog signals in top folder

### Raw signal and LFP

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.dat"
assert file_path.is_file(), file_path

from spikeinterface.extractors.neoextractors import NeuroScopeRecordingExtractor

recording = NeuroScopeRecordingExtractor(file_path=file_path)
recording

Output:
```
NeuroScopeRecordingExtractor: 32 channels - 30.0kHz - 1 segments - 537,603,840 samples 
                              17,920.13s (4.98 hours) - int16 dtype - 32.04 GiB
  file_path: /home/heberto/buzaki/fCamk1_200827_sess9/fCamk1_200827_sess9.dat
```
We see this is around 5 hours of recording. 

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.lfp"
assert file_path.is_file(), file_path

from spikeinterface.extractors.neoextractors import NeuroScopeRecordingExtractor

recording = NeuroScopeRecordingExtractor(file_path=file_path)
recording

Here we see that the sampling rate is wrong. Let's test with the current value from the paper

In [None]:
sampling_rate = 1.25 * 10**3 # 1.25 kHz
duration_seconds = recording.get_num_frames() / sampling_rate
duration_minutes = duration_seconds / 60.0
duration_hours = duration_minutes / 60.0
duration_hours

The output is five hours. This should be corrected

### Other signals in top folder
We have the following
* `analogin.dat`
* `auxiliary.dat`
* `supply.dat`
* `time.dat`
* `digitalin.dat`


Let's check the XML first from where their metadata comes from:

In [None]:
xml_file_path =  session_path /  "fCamk1_200827_sess9.xml"

from xml.etree import ElementTree

tree = ElementTree.parse(xml_file_path)
root = tree.getroot()
acq = root.find('acquisitionSystem')
nbits = int(acq.find('nBits').text)
num_channels = int(acq.find('nChannels').text)
sampling_rate = float(acq.find('samplingRate').text)
voltage_range = float(acq.find('voltageRange').text)
# offset = int(acq.find('offset').text)
amplification = float(acq.find('amplification').text)

print(f"{num_channels=}, {sampling_rate=}, {voltage_range=}, {amplification=}")


In [None]:
signal_names = ["digitalin.dat", "time.dat", "supply.dat", "auxiliary.dat", "analogin.dat"]


for name in signal_names:
    file_path = session_path / f"{name}" 
    assert file_path.is_file(), file_path

    sig_dtype = 'int16' if nbits <= 16 else 'int32'
    data = np.memmap(file_path, dtype=sig_dtype, mode='r', offset=0).reshape(-1,    )
    time = data.shape[0] / sampling_rate
    num_samples = data.shape[0]

    print("---------------")
    print(f"{file_path.name=} \n")
    print(f"num_samples: {num_samples:,}, time: {time / 60.0:.2f} minutes")

Output:
They seem too short:
```
---------------
file_path.name='digitalin.dat' 

num_samples: 16,800,120, time: 9.33 minutes
---------------
file_path.name='time.dat' 

num_samples: 33,600,240, time: 18.67 minutes
---------------
file_path.name='supply.dat' 

num_samples: 16,800,120, time: 9.33 minutes
---------------
file_path.name='auxiliary.dat' 

num_samples: 50,400,360, time: 28.00 minutes
---------------
file_path.name='analogin.dat' 

num_samples: 134,400,960, time: 74.67 minutes
```

## Camera / Video

In [None]:
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "Basler_acA1280-60gc__21606137__20200827_110730202.avi"
assert file_path.is_file(), file_path

In [None]:
file_path

In [None]:
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "Basler_acA1280-60gc__21606137__20200827_110730202.avi"
assert file_path.is_file(), file_path

# Use Pyav to get the video metadata
import av
container = av.open(str(file_path))

video = container.streams.video[0]

print("width", video.codec_context.width)
print("height", video.codec_context.height)
print("pixel format", video.codec_context.pix_fmt)
print("frame rate", video.codec_context.framerate)
print("bit rate", video.codec_context.bit_rate)
print("codec", video.codec_context.codec_tag)



In [None]:
video.start_time, video.average_rate, video.duration, video.time_base

In [None]:
from neuroconv.datainterfaces.behavior.video.video_utils import get_video_timestamps

timestamps_cv = get_video_timestamps(file_path)


In [None]:
from neuroconv.datainterfaces.behavior.sleap.sleap_utils import extract_timestamps


timestamps_sleap = np.array(extract_timestamps(file_path))

In [None]:
timestamps_cv.shape, timestamps_sleap.shape

In [None]:
timestamps_cv[0], timestamps_sleap[0]

In [None]:
1 / 30.0

In [None]:
timestmaps_cv_shifted = timestamps_cv + 0.0333333333333333
np.isclose(timestmaps_cv_shifted, timestamps_sleap)

In [None]:
np.allclose(timestmaps_cv_shifted, timestamps_sleap)

In [None]:
import av

container = av.open(str(file_path))
stream = container.streams.video[0]

counter = 0
for frame in container.decode(stream):
    time_base = frame.time_base
    dts_time = float(frame.dts * time_base)
    pts_time = float(frame.pts * time_base)
    time = float(frame.time)
    print(f"{dts_time= }, {pts_time=}, {time=}")
    counter += 1
    
    if counter > 5:
        break


In [None]:
counter = 0
for packet in container.demux(stream):
    print(f"{counter=}")
    print('Packet PTS:', packet.pts)
    print('Packet DTS:', packet.dts)
    print('Packet Duration:', packet.duration)
    print('Packet Timebase:', packet.time_base)
    
    counter += 1
    if counter > 5:
        break

In [None]:
import av

# Open the video file
container = av.open(str(file_path))
stream = container.streams.video[0]

# Iterate over the first 10 packets in the video stream
for i, packet in enumerate(container.demux(stream)):
    print(f'Packet #{i + 1}:')
    print('PTS:', packet.pts)
    print('DTS:', packet.dts)
    print('Duration:', packet.duration)
    print('Timebase:', packet.time_base)
    print('\n')  # Print a newline for readability
    
    # Stop after 10 packets
    if i >= 9:
        break

# Epochs


In [27]:
session_path_here = project_root /  "fCamk1_200827_sess9"
session_path_here = project_root / "fCamk2" / "fCamk2_201012_sess1"
session_path_here = project_root / "fCamk3_201030_sess12"
file_path = session_path_here / f"{session_path_here.stem}.session.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
epoch_list = mat_file["session"]["epochs"]

In [28]:
session_path_here

PosixPath('/home/heberto/buzaki/fCamk3_201030_sess12')

In [35]:
session_path = session_path_here
name_of_folders = [session_path / f"{epoch['name']}" for epoch in epoch_list]
start_times = [epoch['startTime'] for epoch in epoch_list]
assert all([folder.is_dir() for folder in name_of_folders])

# For each of the folder in name_of_folders look for the .avi file and 
# This can't be done with a rglob because some sessions like `fCamk3_201030_sess12` contain sub-nested sessions.
epoch_to_video_info = {}
for folder, start_time in zip(name_of_folders, start_times):
    video_file_paths = list(folder.glob("*.avi"))
    assert len(video_file_paths) <= 1, "There should be only one .avi file in each epoch folder"
    if len(video_file_paths) == 1:
        epoch_to_video_info[folder.name] = dict(file_path=video_file_paths[0], start_time=start_time)
        
epoch_to_video_info
        


{'fCamk3_201030_094008': {'file_path': PosixPath('/home/heberto/buzaki/fCamk3_201030_sess12/fCamk3_201030_094008/Basler_acA1280-60gc__21606137__20201030_094038065.avi'),
  'start_time': 5940.64},
 'fCamk3_201030_135409': {'file_path': PosixPath('/home/heberto/buzaki/fCamk3_201030_sess12/fCamk3_201030_135409/Basler_acA1280-60gc__21606137__20201030_135451101.avi'),
  'start_time': 20515.872}}

In [25]:
mat_file["session"]["general"]["date"]

'2016-03-15'

In [19]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format

epoch_df = pd.DataFrame(epoch_list)
epoch_df.stopTime.max() / (3600)

3.291324435185185

This prints out five hours as expected.

In [20]:
epoch_df["duration_seconds"] = (epoch_df.stopTime - epoch_df.startTime)
epoch_df["duration_hours"] = epoch_df.duration_seconds / (3600)
print(epoch_df.to_markdown(index=False))

| name                 |   startTime |   stopTime |   duration_seconds |   duration_hours |
|:---------------------|------------:|-----------:|-------------------:|-----------------:|
| fCamk2_201012_090020 |        0    |    3794.69 |           3794.69  |         1.05408  |
| fCamk2_201012_115808 |     3794.69 |    6988.77 |           3194.08  |         0.887244 |
| fCamk2_201012_130347 |     6988.77 |    9449.44 |           2460.67  |         0.68352  |
| fCamk2_201012_134511 |     9449.44 |   10273.7  |            824.224 |         0.228951 |
| fCamk2_201012_140000 |    10273.7  |   11848.8  |           1575.1   |         0.437529 |


In [None]:
epoch_df["duration_hours"].sum()

In [None]:
print(epoch_df.to_markdown(index=False))

| name                 |   startTime |   stopTime | behavioralParadigm   | environment   | manipulation     |   duration_hours |
|:---------------------|------------:|-----------:|:---------------------|:--------------|:-----------------|-----------------:|
| fCamk1_200827_084028 |        0    |    5194.88 | BaselinePre          | Home cage     | None             |         1.44302  |
| fCamk1_200827_101538 |     5194.88 |    8284.64 | PreStim              | Home cage     | uLED random stim |         0.858267 |
| fCamk1_200827_110712 |     8284.64 |   10081.4  | Maze                 | Linear maze   | uLED random stim |         0.499102 |
| fCamk1_200827_113839 |    10081.4  |   11803.6  | PostStim             | Home cage     | uLED random stim |         0.478382 |
| fCamk1_200827_125535 |    11803.6  |   17920.1  | BaselinePost         | Home cage     | None             |         1.69904  |

1,796.77 seconds  
How does it relate to 53143 samples which is the samples for the timestamps in the trials table for the different behavioral files in the .mat files

53143 samples / 1796.77 seconds = 29.6 samples per second (Hz)

This is approximately 30 Hz whic fits wit the sampling rate of the camera.  


# Trials

## `beahvior.cellinfo.mat`

In [None]:
file_path = session_path / f"{session_path.stem}.behavior.cellinfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)

trial_data = mat_file["behavior"]["trials"]


The startTime is are the trial intervals

In [None]:
trial_intervals = trial_data["startPoint"]
start_time, stop_time = trial_intervals[:, 0], trial_intervals[:, 1]
start_time.shape, stop_time.shape


### Visited arm

In [None]:
visted_arm = trial_data["visitedArm"]
visted_arm.shape

#### Recordings
Seems to have only one value

In [None]:
mat_file["behavior"]["trials"]["recordings"].shape

In [None]:
np.unique(mat_file["behavior"]["trials"]["recordings"])

#### Let's take a look at the maps

In [None]:
len(mat_file["behavior"]["maps"])

In [None]:
mat_file["behavior"]["maps"][0].shape, mat_file["behavior"]["maps"][1].shape

In [None]:
mat_file["behavior"]["maps"][0][:10]

In [None]:
mat_file["behavior"]["noStimulatedMaps"][0].shape

In [None]:
mat_file["behavior"]["noStimulatedMaps"][0][:5]

In [None]:
mat_file["behavior"]["stimulatedMaps"][0].shape

In [None]:
mat_file["behavior"]["noStimulatedMapsLegend"]

In [None]:
mat_file["behavior"]["stimulatedMapsLegend"]

In [None]:
timestamps = mat_file["behavior"]["timestamps"]
timestamps[:5]

It seems taht the timestamps are already synchronized. That is, they start at their epoch

In [None]:
trials_timestamps = mat_file["behavior"]["masks"]["trials"]

In [None]:
np.unique(trials_timestamps)

This seems to be a map from timestamps [the index here] to trial. Let's confirm that using a figure. The trials with nan should represent the baseline. Let's change them by -1

In [None]:
# Set np.nan to -1 in trial_timestamps
trials_to_plot = trials_timestamps.copy()
trials_to_plot[np.isnan(trials_to_plot)] = -20

import matplotlib.pyplot as plt
plt.plot(trials_to_plot)


I am wrong. I think there is sjust some delay

In [None]:
# Extract the indexes where the trials change
trial_change_idx = np.nonzero(np.diff(trials_to_plot) != 0)[0]

In [None]:
trial_change_idx

In [None]:
start_time = timestamps[trial_change_idx]
start_time

It seems that the trials cover all the behavioral epoch.
However, we also know that there is no stimulation on all of those trials. I wonder where I can get the times for the behavioral epoch of which there should be 5. Baseline - stim 1 - baseline - stim 2 - baseline.

In [None]:
trial_intervals = mat_file["behavior"]["trials"]["startPoint"]
start_time, stop_time = trial_intervals[:, 0], trial_intervals[:, 1]

# Calculate distance between stop_time and next start_time

inter_trial_distance = start_time[1:] - stop_time[:-1]

inter_trial_distance


In [None]:
maps = np.concatenate(mat_file["behavior"]["maps"], axis=0)
trial_mask = mat_file["behavior"]["masks"]["trials"]
maps.shape

In [None]:
trial_mask = mat_file["behavior"]["masks"]["trials"]
trial_index = 1
mask = trial_mask == trial_index
position = mat_file["behavior"]["position"]
x, y = position["x"], position["y"]
x_trial, y_trial = x[mask], y[mask]


x_no_norm = x * maps[:, 0]
y_no_norm = y * maps[:, 1]

# Create a color gradient based on the index
color_map = plt.cm.get_cmap('plasma')
colors = np.linspace(0, 1, len(x_no_norm[mask]))

# Plot with color gradient
plt.scatter(x_no_norm[mask], y_no_norm[mask], c=colors, cmap=color_map)

plt.colorbar()  # Add colorbar for reference


## `Behavior.mat`
This has the visited arm

In [None]:
file_path = session_path / f"{session_path.stem}.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

In [None]:
mat_file["behavior"]["maps"][0].shape, mat_file["behavior"]["maps"][1].shape

In [None]:
trial_direction = mat_file["behavior"]["masks"]["trialsDirection"]
trial_direction[:5]

In [None]:
trials = mat_file["behavior"]["trials"]
trial_intervals = trials["startPoint"]
visited_arm = trials["visitedArm"]

visited_arm[:5], visited_arm.shape

In [None]:
# Transform vistied_arm to 0 if value is 1 and to 1 otherwise
visited_arm_complement = np.where(visited_arm == 1, 0, 1)

np.where(visited_arm_complement != trial_direction)[0]

In [None]:
visited_arm[110], trial_direction[110]

By looking at the plot below, it seems that this is the direction up and down no left and right.

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np

trial_mask = mat_file["behavior"]["masks"]["trials"]


trials = [108, 109, 110]
trials = [5, 6, 7, 8, 9]

fig, axs = plt.subplots(1, len(trials), figsize=(20, 4))  # Create subplots with 1 row and 5 columns

for i, trial_index in enumerate(trials):
    mask = trial_mask == trial_index
    position = mat_file["behavior"]["position"]
    x, y = position["x"], position["y"]

    # Create a color gradient based on the index
    color_map = plt.cm.get_cmap('RdBu_r')
    colors = np.linspace(0, 1, len(x[mask]))

    # Plot with color gradient
    scatter = axs[i].scatter(x[mask], y[mask], c=colors, cmap=color_map)

    axs[i].set_title(f'Trial {trial_index}')  # Set title for each subplot
    axs[i].set_xlabel('X')  # Set x-axis label for each subplot
    axs[i].set_ylabel('Y')  # Set y-axis label for each subplot

    # Add a small colorbar to each plot
    divider = make_axes_locatable(axs[i])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    fig.colorbar(scatter, cax=cax)

plt.tight_layout()  # Adjust spacing between subplots
print("visited_arm", visited_arm[trials])
print("trial_direction", trial_direction[trials])

# Positions


I want to see if the positions in different files have different units. 

Which are the files 



In [None]:
# In the top level of the session folder

top_level_with_position = ["behavior.cellinfo.mat", "Behavior.mat"]


In [None]:
file_name = top_level_with_position[1]
for file_name in top_level_with_position:
    file_path = session_path / f"{session_path.stem}.{file_name}"
    assert file_path.is_file(), file_path

    mat_file = read_mat(file_path) 
    position = mat_file["behavior"]["position"]
    x, y = position["x"], position["y"]

    plt.plot(x, y)


Same position data here

In [None]:
file_name = "Linearized.Behavior.mat"

sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()

file_path = sub_folder / f"{sub_folder.stem}.{file_name}"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

position = mat_file["behavior"]["position"]
x, y = position["x"], position["y"]

plt.plot(x, y)


In [None]:
file_name  = "Tracking.Behavior.mat"

sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()

file_path = sub_folder / f"{sub_folder.stem}.{file_name}"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 


position = mat_file["tracking"]["position"]
x, y = position["x"], position["y"]

plt.plot(x, y)

# Pulses

In [None]:
file_path = session_path / f"{session_path.stem}.pulses.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
pulses = mat_file["pulses"]
pulses.keys()

In [None]:
pulses["timestamps"]

In [None]:
second_pulse

In [None]:
duration = pulses["timestamps"][:, 1] - pulses["timestamps"][:, 0]
duration[:5]

In [None]:
pulses["analogChannel"][:5]

In [None]:
np.unique(pulses["analogChannel"])

In [None]:
pulses["analogChannelsList"][:5]

In [None]:
pulses["intsPeriods"][:5]

In [None]:
file_path = session_path / f"{session_path.stem}.session.mat"
assert file_path.is_file(), file_path
brain_regions_to_channels = read_mat(file_path)["session"]["brainRegions"]

# Invert the dictionary brain_regions_to_channels
channel_to_brain_region_dict = {}
for region, value in brain_regions_to_channels.items():
    channels = value["channels"]
    channel_to_brain_region_dict |= {channel: region for channel in channels}
    

brain_region = [channel_to_brain_region_dict[i] for i in pulses["analogChannel"]]
brain_region[:5]


In [None]:
pulses["eventGroupID"][:5]

In [None]:
inter_times = pulses["timestamps"][1:] - pulses["timestamps"][:-1]

In [None]:
pulses["duration"][:5]

In [None]:
np.unique(pulses["eventGroupID"])

In [None]:
np.unique(pulses["analogChannel"])

In [None]:
plt.hist(pulses["amplitude"])

In [None]:
pulses["timestamps"].shape

In [None]:
periods = pulses["intsPeriods"]
first_pulse = periods[0][0]
last_pulse = periods[-1][1]

first_pulse, last_pulse, (last_pulse - first_pulse) / 3600

In [None]:
pulse_durations = periods[:, 1] - periods[:, 0] 
plt.hist(pulse_durations)

In [None]:


x = periods[:, 0]
y = np.ones_like(x)
width = periods[:, 1] - periods[:, 0]
plt.bar(x=x, height=y, width=width, bottom=0, align='edge')

# Let's plot the epochs as well

file_path = session_path / f"{session_path.stem}.session.mat"
assert file_path.is_file(), file_path

mat_file_epochs = read_mat(file_path)
epoch_list = mat_file_epochs["session"]["epochs"]

start_time_list = [epoch["startTime"] for epoch in epoch_list[1:-1]]
epoch_name = [epoch["behavioralParadigm"] for epoch in epoch_list[1:-1]]

# Add an annotation for every epoch in start_time_list
for start_time in start_time_list:
    plt.axvline(start_time, color="red", linestyle="--", alpha=0.5, linewidth=5)
    
# Add the name in the top for each epoch in epoch_name
for start_time, name in zip(start_time_list, epoch_name):
    plt.text(start_time, 1.1, name, rotation=45, horizontalalignment="center")
    
plt.xlabel("Seconds")

# Remote frame in the y axis
plt.yticks([])

# Remote the frame
plt.box(False)

Juding by the time and duration the pulses cover the three epochs. The Prestim, the Maze and the PostStim.



In [None]:
np.unique(pulses["analogChannel"])

In [None]:
np.unique(pulses["eventGroupID"])

In [None]:
np.unique(pulses["analogChannelsList"])

In [None]:
channel = pulses["analogChannel"]
channel.shape

In [None]:
pulse_duration_ms = pulses["duration"] * 1000
plt.hist(pulse_duration_ms)

This is correct duration of 20 milliseconds as stated in the paper.

In [None]:
plt.hist(pulses["analogChannel"])

In [None]:
plt.hist(pulses["analogChannelsList"])

## Optogenic pulses


'fCamk1_200827_sess9.optogeneticResponse.cellinfo.mat size: 375.40 MiB',
 'fCamk1_200827_sess9.optogeneticPulses.events.mat 

In [None]:
file_path = session_path / f"{session_path.stem}.optogeneticPulses.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
epochs_stimulation = mat_file["optoPulses"]["stimulationEpochs"]

In [None]:
epoch_length = epochs_stimulation[:, 1] - epochs_stimulation[:, 0]
epochs_stimulation[epoch_length < 0]

Some of them have negative length. I am confused about what this means.

In [None]:
# Plot a bar for the periods[:, 0]

# Adjust the figure size
plt.figure(figsize=(16, 8))
# Adjust all the fonts
plt.rcParams.update({'font.size': 25})

x = epochs_stimulation[:, 0]
y = np.ones_like(x)
width = epochs_stimulation[:, 1] - epochs_stimulation[:, 0]
plt.bar(x=x, height=y, bottom=0, align='edge')

# Let's plot the epochs as well

file_path = session_path / f"{session_path.stem}.session.mat"
assert file_path.is_file(), file_path

mat_file_epochs = read_mat(file_path)
epoch_list = mat_file_epochs["session"]["epochs"]

start_time_list = [epoch["startTime"] for epoch in epoch_list[1:-1]]
epoch_name = [epoch["behavioralParadigm"] for epoch in epoch_list[1:-1]]

# Add an annotation for every epoch in start_time_list
for start_time in start_time_list:
    plt.axvline(start_time, color="red", linestyle="--", alpha=0.5, linewidth=5)
    
# Add the name in the top for each epoch in epoch_name
for start_time, name in zip(start_time_list, epoch_name):
    plt.text(start_time, 1.1, name, rotation=45, horizontalalignment="center")
    
plt.xlabel("Seconds")

# Remote frame in the y axis
plt.yticks([])

# Remote the frame
plt.box(False)

# Sleep states

In [None]:
file_path = session_path / f"{session_path.stem}.SleepState.states.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)


In [None]:
sleep_states_data = mat_file["SleepState"]
sleep_states_data.keys()

In [None]:
intervals = sleep_states_data["ints"]
available_states = [str(key) for key in intervals.keys()]
available_states

In [None]:
intervals["WAKEstate"]

In [None]:
intervals["REMstate"]

In [None]:
idx = sleep_states_data["idx"]
statenames = idx["statenames"]
statenames, len(statenames)

In [None]:
np.unique(idx["states"])

This are just the timestamps and the indexes corresponding to the statenames

In [None]:
available_states = [state for state in statenames if len(state) > 0]
available_states

In [None]:
idx["theta_states"]

In [None]:
np.unique(idx["theta_states"]["states"])

Here, 0 probabl ymeans the absence of signal but then there is theta vs non-theta.

# Theta 

In [None]:
file_path = session_path / f"{session_path.stem}.thetaEpochs.states.mat"
assert file_path.is_file(), file_path
mat_file = read_mat(file_path)


dd

# Session general information (`sesion.mat`)

In [None]:
file_path = session_path / f"{session_path.stem}.session.mat"
assert file_path.is_file(), file_path
mat_file = read_mat(file_path)


# Add epochs to nwbfile

In [None]:
epoch_list = mat_file["session"]["epochs"]
epoch_list[0]

In [None]:
from pynwb.testing.mock.file import mock_NWBFile

nwb_file = mock_NWBFile()

nwb_file.add_epoch_column(name="behavioral_paradigm", description="The behavioral paradigm of the epoch")
nwb_file.add_epoch_column(name="environment", description="The environment in the epoch")
nwb_file.add_epoch_column(name="manipulation", description="The stimulus in the epoch")


In [None]:
for epoch in epoch_list:
    start_time = float(epoch["startTime"])
    stop_time = float(epoch["stopTime"])
    behavioral_paradigm = epoch["behavioralParadigm"]
    environment = epoch["environment"]
    manipulation = epoch["manipulation"]
    
    nwb_file.add_epoch(start_time=start_time, stop_time=stop_time, behavioral_paradigm=behavioral_paradigm, environment=environment, manipulation=manipulation)    

In [None]:
nwb_file.epochs.to_dataframe()

# Camera

In [None]:
videos_available = list(session_path.rglob('*.avi'))
assert len(videos_available) == 1, f"There should be one and only one video file {videos_avaialble}"  

In [None]:
from neuroconv.datainterfaces import VideoInterface

interface = VideoInterface(file_paths=videos_available)
interface.get_metadata()

# Rewards as events

In [None]:
file_path = session_path / f"{session_path.stem}.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

events_data = mat_file["behavior"]["events"]

In [None]:
# Extract timestamps and create labels for rewards
reward_r_timestamps = events_data["rReward"]
reward_l_timestamps = events_data["lReward"]
label_reward_r = np.ones(reward_r_timestamps.shape[0], dtype=int)
label_reward_l = np.zeros(reward_l_timestamps.shape[0], dtype=int)

# Create a structure to concatenate timestamps and sort by them
reward_r = np.vstack((reward_r_timestamps, label_reward_r))
reward_l = np.vstack((reward_l_timestamps, label_reward_l))
rewards = np.concatenate((reward_r, reward_l), axis=1)

timestamps_both_rewards = rewards[0, :]
rewards = rewards[:, timestamps_both_rewards.argsort()]

In [None]:
timestamps = rewards[0, :]
assert np.all(np.diff(timestamps) > 0)

In [None]:
plt.plot(np.arange(rewards.shape[1]), rewards[0, :])
# Add an identity line in the plot
np.diff(r)

In [None]:
from ndx_events import LabeledEvents

timestamps = rewards[0, :]
data = rewards[1, :]

events = LabeledEvents(
    name='rewards',
    description='rewards in the linear track',
    timestamps=timestamps,
    data=data,
    labels=['right_reward', 'left_reward']
)

from pynwb.testing.mock.file import mock_NWBFile

nwbfile = mock_NWBFile()


nwbfile.add_acquisition(events)

# Add pulses 

## As series

In [None]:
file_path = session_path / f"{session_path.stem}.pulses.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
pulses_data = mat_file["pulses"]
pulses_data.keys()

In [None]:
pulse_intervals = pulses_data["timestamps"] 
pulse_micro_led = pulses_data["analogChannel"]
pulse_amplitude = pulses_data["amplitude"]

In [None]:
pulse_amplitude[:5]

In [None]:
np.unique(pulses_data["analogChannel"])

In [None]:
np.unique(pulses_data["eventGroupID"])

In [None]:
from pynwb.testing.mock.file import mock_NWBFile
from pynwb.ogen import OptogeneticStimulusSite
from pynwb.ogen import OptogeneticSeries

nwbfile = mock_NWBFile()

manufacturer = "Neurolight Technologies"
name = "N1-F21-O36 | 18"
description = ("12 µLEDs, 10 x 15 µm each, 3 per shank\n"
              "Emission Peak λ = 460 nm and FWHM = 40 nm\n"
              "Typical irradiance of 33 mW/mm² (@ max operating current of 100 µA)\n"
              "32 recording channels, 8 per shank\n"
              "Electrode impedance of 1000 - 1500 kΩ at 1 kHz\n")
            
device_metadata = dict(name=name, description=description, manufacturer=manufacturer)
if device_metadata["name"] not in nwbfile.devices:
    neurolight_probe = nwbfile.create_device(**device_metadata)
    
micro_led_ids = np.unique(pulse_micro_led)
site_description = "microscopic LED 10 x 15 µm each, 3 per shank. Each μLED has an emission area of 150 μm2"
location = "dorsal right hippocampus (antero-posterior 2.0 mm, mediolateral 1.5 mm, dorsoventral 0.6 mm)"
micro_led_ids_to_site = dict()

for id in micro_led_ids:

    ogen_stim_site = OptogeneticStimulusSite(
        name=f"Microled site in Neurolight probe with id {id}",
        device=neurolight_probe,
        description=site_description,
        excitation_lambda=460.0,  # nm
        location=location, # TODO find the mapping for precise location per site if possible
    )
    micro_led_ids_to_site[id] = ogen_stim_site
    nwbfile.add_ogen_site(ogen_stim_site)


for id in micro_led_ids:
    site_intervals = pulse_intervals[pulse_micro_led == id]
    site_amplitudes = pulse_amplitude[pulse_micro_led == id]
    pulse_start_time, pulse_stop_time = site_intervals[:, 0], site_intervals[:, 1]
    amplitude_at_start = np.zeros_like(pulse_start_time)
    amplitude_at_stop = site_amplitudes 

    raise_time = 0.001 # 1 ms
    rise_to_max_time = pulse_start_time + raise_time
    amplitude_at_max = site_amplitudes

    # Assume from the trapezoidal profile that the decay time is the same as the rise time 
    decay_time = pulse_stop_time + raise_time 
    amplitude_after_decay = np.zeros_like(decay_time)

    timestamps = np.vstack((pulse_start_time, rise_to_max_time, pulse_stop_time, decay_time))
    data = np.vstack((amplitude_at_start, amplitude_at_max, amplitude_at_stop, amplitude_after_decay))

    site_timestamps = timestamps.T.flatten()
    site_data = data.T.flatten()

    optogenetic_series_description = ("μLEDs were controlled with current (2-4.5 μA generating 0.02-0.1μW of total light power;"
                "ref (15)) provided by a 12-channel current generator (OSC1Lite, NeuroNex Michigan Hub)"
                "driven by an Arduino, which delivered trapezoid (1ms rise time)"
                "blue light (centered emission at 460 nm, emission surface area = 150 mm2) 20 ms pulses at"
                "random sites with a randomly variable (40-60ms) offset")
    optogenetic_site = micro_led_ids_to_site[id]
    optogenetic_series = OptogeneticSeries(
        name=f"Stimuli from microLED site {id}",
        timestamps = site_timestamps,
        data=site_data,
        site=optogenetic_site,
        description=optogenetic_series_description,
    )

    nwbfile.add_stimulus(optogenetic_series)

    n = 4
    zero = np.min(pulse_intervals[:, 0])
    plt.plot((site_timestamps[:n] - zero) * 1000.0, site_data[:n]) 



In [None]:
site_timestamps[:n] / 1000

In [None]:
timestamps[:5, :].T

In [None]:
timestamps.T.flatten()[:5]

## As time intervals

In [None]:
file_path = session_path / f"{session_path.stem}.pulses.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
pulses_data = mat_file["pulses"]
pulses_data.keys()

In [None]:
pulse_intervals = pulses_data["timestamps"] 
electrode_channel = pulses_data["analogChannel"]
amplitude = pulses_data["amplitude"]





In [None]:
from pynwb.testing.mock.file import mock_NWBFile

nwbfile = mock_NWBFile()

from pynwb.epoch import TimeIntervals

laser_description = (
"""
μLEDs were controlled with current (2-4.5 μA generating 0.02-0.1μW of total light power;
ref (15)) provided by a 12-channel current generator (OSC1Lite, NeuroNex Michigan Hub)
driven by an Arduino (https://github.com/valegarman), which delivered trapezoid (1ms rise time)
blue light (centered emission at 460 nm, emission surface area = 150 mm2) 20 ms pulses at
random sites with a randomly variable (40-60ms) offset.
"""
)

stimuli_laser_pulses = TimeIntervals(
    name="laser_pulses",
    description="intervals for each pulse stimuli for the uLED electrodes",
)

stimuli_laser_pulses.add_column(name="electrode_channel", description="The electrode channel for the pulse")
stimuli_laser_pulses.add_column(name="amplitude", description="The amplitude of the pulse")

for interval, channel, amp in zip(pulse_intervals, electrode_channel, amplitude):
    start_time, stop_time = interval
    channel = channel
    row_dict = {"start_time": start_time, "stop_time": stop_time, "electrode_channel": channel, "amplitude": amp}
    stimuli_laser_pulses.add_row(**row_dict)
    

nwbfile.add_time_intervals(stimuli_laser_pulses)

# Ripples as events

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.ripples.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path)
ripples_data = mat_file["ripples"]   

In [None]:
ripple_intervals = ripples_data["timestamps"]

peaks = ripples_data["peaks"]
peak_normed_power = ripples_data["peakNormedPower"]

ripple_stats_data = ripples_data["rippleStats"]["data"]

peak_frequencies = ripple_stats_data["peakFrequency"]
ripple_durations = ripple_stats_data["duration"]
peak_amplitudes = ripple_stats_data["peakAmplitude"]

descriptions = dict(
    ripple_durations="Duration of the ripple event.",
    peaks="Peak of the ripple.",
    peak_normed_power="Normed power of the peak.",
    peak_frequencies="Peak frequency of the ripple.",
    peak_amplitudes="Peak amplitude of the ripple.",
)



In [None]:
from pynwb.testing.mock.file import mock_NWBFile
from pynwb import NWBFile, H5DataIO
from pynwb.epoch import TimeIntervals


nwbfile = mock_NWBFile()
name = "ripples"
table = TimeIntervals(name=name, description="Ripples and their metrics")


for start_time, stop_time in ripple_intervals:
    table.add_row(start_time=start_time, stop_time=stop_time)

for column_name, column_data in zip(
    list(descriptions), [ripple_durations, peaks, peak_normed_power, peak_frequencies, peak_amplitudes]
):
    table.add_column(
        name=column_name,
        description=descriptions[column_name],
        data=H5DataIO(column_data, compression="gzip"),
    )

# Extract indexed data

ripple_stats_maps = ripples_data["rippleStats"]["maps"]

ripple_raw = ripple_stats_maps["ripples_raw"]
ripple_frequencies = ripple_stats_maps["frequency"]
ripple_phases = ripple_stats_maps["phase"]
ripple_amplitudes = ripple_stats_maps["amplitude"]

indexed_descriptions = dict(
    ripple_raw="Extracted ripple data.",
    ripple_frequencies="Frequency of each point on the ripple.",
    ripple_phases="Phase of each point on the ripple.",
    ripple_amplitudes="Amplitude of each point on the ripple.",
)

for column_name, column_data in zip(
    list(indexed_descriptions), [ripple_raw, ripple_frequencies, ripple_phases, ripple_amplitudes]
):
    table.add_column(
        name=column_name,
        description=indexed_descriptions[column_name],
        index=list(range(column_data.shape[0])),
        data=H5DataIO(column_data, compression="gzip"),
    )
    



In [None]:
ripple_frequencies.shape

# Compare sorting and sorting times

In [None]:
from neuroconv.datainterfaces import CellExplorerSortingInterface

In [None]:
file_path = session_path / f"{session_path.stem}.spikes.cellinfo.mat"

cell_explorer_interface = CellExplorerSortingInterface(file_path=file_path)

In [None]:
sorter_cell_explorer = cell_explorer_interface.sorting_extractor
sorter_cell_explorer

In [None]:
import pandas as pd
sorter = sorter_cell_explorer

spike_range = {}
for unit_id in sorter.unit_ids:

    spikes = sorter.get_unit_spike_train(unit_id=unit_id, return_times=True) / 3600
    first_spike, last_spike = spikes[0], spikes[-1]
    spike_range[unit_id] = dict(first_spike=first_spike, last_spike=last_spike)
    
pd.DataFrame(spike_range).max(axis=1)


In [None]:
from neuroconv.datainterfaces import KiloSortSortingInterface

folder_path = session_path / "Kilosort_2020-08-28_231022"
kilosort_interface = KiloSortSortingInterface(folder_path=folder_path)
kilosort_sorter = kilosort_interface.sorting_extractor
kilosort_sorter

In [None]:
import pandas as pd
sorter = kilosort_sorter

spike_range = {}
for unit_id in sorter.unit_ids:

    spikes = sorter.get_unit_spike_train(unit_id=unit_id, return_times=True) / 3600
    first_spike, last_spike = spikes[0], spikes[-1]
    spike_range[unit_id] = dict(first_spike=first_spike, last_spike=last_spike)
    
pd.DataFrame(spike_range).max(axis=1)


In [None]:
folder_path = session_path / "Kilosort_2020-08-28_231022"
neuroscope_interface = NeuroScopeSortingInterface(folder_path=folder_path)

neuroscope_sorter = neuroscope_interface.sorting_extractor
neuroscope_sorter

# Intan header

https://intantech.com/files/Intan_RHD2000_data_file_formats.pdf

Python format:
https://intantech.com/downloads.html?tabSelect=Software

In [None]:
file_path_intan = session_path / "info.rhd"
file_path_intan.is_file()

In [None]:
import struct

def read_qstring(f):
    length = struct.unpack('<I', f.read(4))[0]

    if length == 0xFFFFFFFF:
        return None

    length = length // 2  # Convert length from bytes to 16-bit Unicode words
    a = []
    
    for _ in range(length):
        a.append(struct.unpack('<H', f.read(2))[0])

    return ''.join(chr(c) for c in a)

def read_channel(f):
    native_name = read_qstring(f)
    custom_name = read_qstring(f)
    native_order = struct.unpack('<h', f.read(2))[0]
    custom_order = struct.unpack('<h', f.read(2))[0]
    signal_type = struct.unpack('<h', f.read(2))[0]
    enabled = struct.unpack('<h', f.read(2))[0]
    chip_channel = struct.unpack('<h', f.read(2))[0]
    board_stream = struct.unpack('<h', f.read(2))[0]
    spike_scope_voltage_trigger_mode = struct.unpack('<h', f.read(2))[0]
    spike_scope_voltage_treshold = struct.unpack('<h', f.read(2))[0]
    spike_scope_digital_trigger_channel = struct.unpack('<h', f.read(2))[0]
    spike_scope_digital_edge_polarity = struct.unpack('<h', f.read(2))[0]
    electrode_impedance_magnitude = struct.unpack('<f', f.read(4))[0]
    electrode_impedance_phase = struct.unpack('<f', f.read(4))[0]
    
    return {
        'native_name': native_name,
        'custom_name': custom_name,
        'native_order': native_order,
        'custom_order': custom_order,
        'signal_type': signal_type,
        'enabled': enabled,
        'chip_channel': chip_channel,
        'board_stream': board_stream,
        "spike_scope_voltage_trigger_mode": spike_scope_voltage_trigger_mode,
        "spike_scope_voltage_treshold": spike_scope_voltage_treshold,
        "spike_scope_digital_trigger_channel": spike_scope_digital_trigger_channel,
        "spike_scope_digital_edge_polarity": spike_scope_digital_edge_polarity,
        "electrode_impedance_magnitude": electrode_impedance_magnitude,
        "electrode_impedance_phase": electrode_impedance_phase
    }

def read_signal_group(f):
    name = read_qstring(f)
    prefix = read_qstring(f)
    enabled = struct.unpack('<h', f.read(2))[0]
    num_channels = struct.unpack('<h', f.read(2))[0]
    num_amp_channels = struct.unpack('<h', f.read(2))[0]

    channels = []
    for i in range(num_channels):
        channel = read_channel(f)
        channels.append(channel)

    return {
        'name': name,
        'prefix': prefix,
        'enabled': enabled,
        'num_channels': num_channels,
        'num_amp_channels': num_amp_channels,
        'channels': channels
    }

with open(file_path_intan, 'rb') as f:
    # Read magic number
    magic_number = struct.unpack('<I', f.read(4))[0]
    print(hex(magic_number))
    print("0xC6912702")
    # Read version number
    version = struct.unpack('<h', f.read(2))[0]
    version_small = struct.unpack('<h', f.read(2))[0]
    # Unpack a single float with struct.unpack
    sampling_rate = struct.unpack('<f', f.read(4))[0]
    dsp_enabled = struct.unpack('<h', f.read(2))[0]
    dsp_cutoff_frequency = struct.unpack('<f', f.read(4))[0]
    lower_bandwidth = struct.unpack('<f', f.read(4))[0]
    uppper_bandwith = struct.unpack('<f', f.read(4))[0]
    desired_dsp_cutoff_frequency = struct.unpack('<f', f.read(4))[0]
    desired_lower_bandwidth = struct.unpack('<f', f.read(4))[0]
    desired_upper_bandwidth = struct.unpack('<f', f.read(4))[0]
    
    print(f"Version = {version}.{version_small}")
    print(f"{sampling_rate=}, {dsp_enabled=}, {dsp_cutoff_frequency=}, {lower_bandwidth=}")
    print(f"{desired_dsp_cutoff_frequency=}, {desired_lower_bandwidth=}, {desired_upper_bandwidth=}")
    
    notch_filter_mode = struct.unpack('<h', f.read(2))[0]
    desired_impedance_test_frequency = struct.unpack('<f', f.read(4))[0]
    actual_impedance_test_frequency = struct.unpack('<f', f.read(4))[0]
    
    print(f"{notch_filter_mode=}, {desired_impedance_test_frequency=}, {actual_impedance_test_frequency=}")    
    
    # # Read note fields
    
    note1 = read_qstring(f)
    note2 = read_qstring(f)
    note3 = read_qstring(f)
    
    print("Notes")
    print(f"{note1=}, {note2=}, {note3=}")
    
    number_of_temperature_sensors = struct.unpack('<h', f.read(2))[0]
    
    if version >= 1 and version_small >= 1.3:
        board_mode = struct.unpack('<h', f.read(2))[0]
    else:
        board_mode = "Not supported in this version"
    if version >= 2:
        reference_channel = struct.unpack('<h', f.read(2))[0]
    else:
        reference_channel = "Not supported in this version"
    print(f"{number_of_temperature_sensors=}, {board_mode=}, {reference_channel=}")

    number_of_signal_groups = struct.unpack('<h', f.read(2))[0]
    print("Signal groups", number_of_signal_groups)
    
    
    # signal_group_name = read_qstring(f)
    # signal_group_prefix = read_qstring(f)
    # singal_group_enabled = struct.unpack('<h', f.read(2))[0]
    # number_of_channels = struct.unpack('<h', f.read(2))[0]
    # number_of_amplifier_channels = struct.unpack('<h', f.read(2))[0]
    
    # print(f"{signal_group_name=}, {signal_group_prefix=}, {singal_group_enabled=}, {number_of_channels=}, {number_of_amplifier_channels=}")
    
    # channels = read_channel(f)
    # print(channels)
    
    signal_groups = []
    for _ in range(number_of_signal_groups):
        signal_group = read_signal_group(f)
        signal_groups.append(signal_group)

In [None]:
print(signal_groups[0])

In [None]:
for signal_group_index in range(number_of_signal_groups):
    print(f"port = {signal_groups[signal_group_index]['name']}")
    print(signal_groups[signal_group_index]["num_channels"])
    print(signal_groups[signal_group_index]["num_amp_channels"])
    print("-----------------")

# Check NWB result

In [None]:
from pathlib import Path
output_dir_path = Path.home() / "conversion_nwb"

stub_path_list = list((output_dir_path / "nwb_stub").iterdir())

file_path = stub_path_list[0]

import pynwb 

# Open file with pynwb
io = pynwb.NWBHDF5IO(str(file_path), mode='r', load_namespaces=True)
nwbfile = io.read()
nwbfile


In [None]:
dataframe = nwbfile.electrodes.to_dataframe()
import pandas as pd
# Show all the entries of the dataframe
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(dataframe)

In [None]:
dataframe[dataframe.channel_name == "ch9grp3"]

In [None]:
nwbfile.acquisition['Video: Basler_acA1280-60gc__21606137__20200827_110730202']

In [None]:
lfp_module = nwbfile.processing["ecephys"]["LFP"]
lfp_module

In [None]:
lfp_module["ElectricalSeriesLF"]