# Configuration

In [None]:
from pathlib import Path
import numpy as np

from pymatreader import read_mat
import json 

project_root = Path("/home/heberto/buzaki")
session_path = project_root /  "fCamk1_200827_sess9"
assert session_path.is_dir()

# Dump to a file in the same folder
json_directory = Path.cwd() / "_json_files"
json_directory.mkdir(exist_ok=True)

# Session overview

#### Mat files

In [None]:
# Retrieve file paths and sizes
session_files_path_list = list(session_path.iterdir())
condition = lambda file_path: ".mat" in file_path.name

files = [(file_path.name, f"size: {file_path.stat().st_size / 1024**2 :,.2f} MiB") for file_path in session_files_path_list if condition(file_path)]

# Sort the list by file size in descending order
files.sort(key=lambda x: x[1], reverse=True)

# Determine the maximum width for alignment based on longest file name length
max_file_name_length = max(len(name) for name, _ in files)
max_size_width = max_file_name_length + len("  size: ")

# Create new list with formatted strings
file_as_string = [f"{name.ljust(max_file_name_length)} {size}" for name, size in files]
file_as_string

### The rest

In [None]:
session_files_path_list = list(session_path.iterdir())
condition = lambda file_path : ".mat" not in file_path.name and file_path.is_file()

files = [(file_path.name, f"size: {file_path.stat().st_size / 1024**2 :,.2f} MiB") for file_path in session_files_path_list if condition(file_path)]

# Sort the list by file size in descending order
files.sort(key=lambda x: x[1], reverse=True)

# Determine the maximum width for alignment based on longest file name length
max_file_name_length = max(len(name) for name, _ in files)
max_size_width = max_file_name_length + len("  size: ")

# Create new list with formatted strings
file_as_string = [f"{name.ljust(max_file_name_length)} {size}" for name, size in files]
file_as_string


This is the output:

* `analogin.dat`, `8,203.18 MiB` : Probably raw data
* `fCamk1_200827_sess9.dat`, `32,812.73 MiB` : TODO
* `auxiliary.dat`, `3,076.19 MiB` : TODO
* `time.dat`, `2,050.80 MiB` :  TODO
* `TrialMapsAndRasters.pptx`, `1.02 MiB` : Collection of rasters, I don't think it matters. 
* `fCamk1_200827_sess9.lfp`, `1,367.20 MiB` :  LFP
* `digitalin.dat`, `1,025.40 MiB` :  TODO
* `supply.dat`, `1,025.40 MiB` :  TODO
* `pulTime.npy`, `0.32 MiB` : TODO
* `fCamk1_200827_sess9_HSE.HSE.evt`, `0.28 MiB` : TODO 
* `deepSuperficial_classification_fromRipples.png`, `0.14 MiB` : Figure, probably does not matter 
* `bz_DetectSWR.log`, `0.05 MiB` :  TODO
* `bz_DetectSWR_manu.log`, `0.05 MiB` : TODO
* `fCamk1_200827_sess9.xml`, `0.04 MiB` : TODO
* `fCamk1_200827_sess9.nrs`, `0.00 MiB` : TODO
* `4nA pulses.txt`, `0.00 MiB` : TODO
* `info.rhd`, `0.00 MiB` : TODO

#### Folders / Directories

In [None]:
session_files_path_list = list(session_path.iterdir())

files = [(file_path.name, f"size: {file_path.stat().st_size / 1024**2 :,.2f} MiB") for file_path in session_files_path_list if condition(file_path)]

# Sort the list by file size in descending order
files.sort(key=lambda x: x[1], reverse=True)

# Determine the maximum width for alignment based on longest file name length
max_file_name_length = max(len(name) for name, _ in files)
max_size_width = max_file_name_length + len("  size: ")

# Create new list with formatted strings
file_as_string = [f"{name.ljust(max_file_name_length)} {size}" for name, size in files]
file_as_string

# Explore specific files

In [None]:
def build_keys_and_types(dictionary):
    output_dict = {}
    for key, value in dictionary.items():
        if isinstance(value, dict):
            output_dict[key] = build_keys_and_types(value)
        elif isinstance(value, np.ndarray):
            if value.size > 10:
                output_dict[key] = {
                    'type': str(type(value)),
                    'shape': str(value.shape)
                }
            else:
                # Print small arrays
                output_dict[key] = {
                    'type': str(type(value)),
                    'value': str(value)
                }
        elif isinstance(value, list):
            if len(value) > 10:
                output_dict[key] = {
                    'type': str(type(value)),
                    'length': len(value)
                }
            else:
                # Print small lists
                output_dict[key] = {
                    'type': str(type(value)),
                    'value': str(value)
                }
        else:
            output_dict[key] = {
                "type": str(type(value)),
                "value": str(value),
            }
    return output_dict


## Sessions

### `session.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.session.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

#### `sessionInfo.mat`

In [None]:

file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.sessionInfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)

## Behavior 

### `Behavior.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


### `behavior.cellinfo.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.behavior.cellinfo.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


### `Tracking.Behavior.mat`

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.Tracking.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["tracking"]["position"]["x"][:5]

## Subfolder

### `Tracking.Behavior.mat` subfolder

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / f"{sub_folder.stem}.Tracking.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["tracking"]["position"]["x"][:5]

It seems that this in the file above have the same position data

### `Linearized.Behavior.mat` subfolder

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / f"{sub_folder.stem}.Linearized.Behavior.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "".join(file_path.suffixes)[1:] + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["behavior"]["position"]["x"][:5]

And it is the same position again here. 

However, this has the trials as well.

In [None]:
mat_file["behavior"]["trials"]["startPoint"][:5]

This seems the position and not the times as we need. Let's look for other folder to see if we can find the trials

#### Camera matlab file

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "Basler_acA1280-60gc__21606137__20200827_110730202.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "camera" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)




Here they just copied the camera frame by frame (!)

```

{
  "frames": {
    "r": {
      "type": "<class 'numpy.ndarray'>",
      "shape": "(1024, 204, 53143)"
    }
  }
}
```
How long is the video 53_143 / 30 = 1_771.43 seconds -> 29.52 minutes

Taking the sampling_rate of 30 Hz, we can see that the video is 1_771.43 seconds long and the ntransform to minutes.

### Virtual maze mat

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "virtualMaze.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = f"{file_path.stem}" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


This seems just like a map of the maze

```
  "maze": {
    "type": "<class 'numpy.ndarray'>",
    "value": "[[ 11.09870461 109.25348571]\n [ 12.12486221   0.85756268]]"
  }
```

### Roi tracking .mat file

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "roiTracking.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = f"{file_path.stem}" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


I think this is a just a map with the reigon where they are doing the tracking of the position with the camera:

```
  "roiTracking": {
    "type": "<class 'numpy.ndarray'>",
    "value": "[[  31.25 1015.  ]\n [ 185.75 1019.5 ]\n [ 184.25    5.5 ]\n [  17.75    2.5 ]\n [  31.25 1015.  ]]"
  }
```

### Amplifier digital events `.mat`

In [None]:
file_path = session_files_path_list[0]
sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()
file_path = sub_folder / "amplifier.DigitalIn.events.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = f"{file_path.stem}" + "_sub_folder" + ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
mat_file["digitalIn"].keys()

```
dict_keys(['timestampsOn', 'timestampsOff', 'ints', 'dur', 'intsPeriods'])
```
I think this is the TTLs that synchronize the events. Come to this again.

In [None]:
mat_file["digitalIn"]["timestampsOff"][0].shape

In [None]:
mat_file["digitalIn"]["timestampsOff"][3].shape

In [None]:
mat_file["digitalIn"]["dur"][0].shape

### Analog signals in sub-folder

We have the following
* `auxiliary.dat`
* `supply.dat`
* `time.dat`
* `digitalin.dat`

But two xml files:
* `fCamk1_200827_sess9.xml` which I guess it the main recorder
* `amplifier.xml` which I don't know what it does.

The first is on the top directory, the other one is here.

Note that in the sub-folder we only have `amplifier.xml` are they different?

Let's test this

In [None]:
xml_file_path =  session_path /  "fCamk1_200827_sess9.xml"

from xml.etree import ElementTree

tree = ElementTree.parse(xml_file_path)
root = tree.getroot()
acq = root.find('acquisitionSystem')
nbits = int(acq.find('nBits').text)
num_channels = int(acq.find('nChannels').text)
sampling_rate = float(acq.find('samplingRate').text)
voltage_range = float(acq.find('voltageRange').text)
# offset = int(acq.find('offset').text)
amplification = float(acq.find('amplification').text)

print(f"{num_channels=}, {sampling_rate=}, {voltage_range=}, {amplification=}")


In [None]:

sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()

xml_file_path =  sub_folder /  "amplifier.xml"

from xml.etree import ElementTree

tree = ElementTree.parse(xml_file_path)
root = tree.getroot()
acq = root.find('acquisitionSystem')
nbits = int(acq.find('nBits').text)
num_channels = int(acq.find('nChannels').text)
sampling_rate = float(acq.find('samplingRate').text)
voltage_range = float(acq.find('voltageRange').text)
# offset = int(acq.find('offset').text)
amplification = float(acq.find('amplification').text)

print(f"{num_channels=}, {sampling_rate=}, {voltage_range=}, {amplification=}")


Output:
The top level:
```
num_channels=32, sampling_rate=30000.0, voltage_range=20.0, amplification=1000.0
```

The amplifier:
```
num_channels=32, sampling_rate=30000.0, voltage_range=20.0, amplification=1000.0
```

They seem similar. I wonder what we have two but we have a lot of redundancy in the conversion data.

### Signals

In [None]:
signal_names = ["digitalin.dat", "time.dat", "supply.dat", "auxiliary.dat"]


from spikeinterface.extractors.neoextractors import NeuroScopeRecordingExtractor

sub_folder = session_path / f"{session_path.stem.rsplit('_', 1)[0]}_110712"
assert sub_folder.is_dir()

for name in signal_names:
    file_path = sub_folder / f"{name}" 
    assert file_path.is_file(), file_path

    sig_dtype = 'int16' if nbits <= 16 else 'int32'
    data = np.memmap(file_path, dtype=sig_dtype, mode='r', offset=0).reshape(-1, num_channels)
    time = data.shape[0] / sampling_rate
    num_samples = data.shape[0]

    print("---------------")
    print(f"{file_path.name=} \n")
    print(f"num_samples: {num_samples:,}, time: {time:.2f} seconds")

Output:
```
---------------
file_path.name='digitalin.dat' 

num_samples: 1,684,470, time: 56.15 seconds
---------------
file_path.name='time.dat' 

num_samples: 3,368,940, time: 112.30 seconds
---------------
file_path.name='supply.dat' 

num_samples: 1,684,470, time: 56.15 seconds
---------------
file_path.name='auxiliary.dat' 

num_samples: 5,053,410, time: 168.45 seconds
```

# Channels

### Channel map

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / "chanMap.mat"
assert file_path.is_file(), file_path

mat_file = read_mat(file_path) 

result = build_keys_and_types(mat_file)
json_output = json.dumps(result, indent=2)

json_name = "chanMap"+ ".json"
with open(json_directory / json_name, 'w') as f:
    f.write(json_output)


In [None]:
connected = mat_file["connected"]  

In [None]:
mat_file["chanMap"]

In [None]:
mat_file["chanMap0ind"]

# Analog signals in top folder

## Raw signal and LFP

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.dat"
assert file_path.is_file(), file_path

from spikeinterface.extractors.neoextractors import NeuroScopeRecordingExtractor

recording = NeuroScopeRecordingExtractor(file_path=file_path)
recording

Output:
```
NeuroScopeRecordingExtractor: 32 channels - 30.0kHz - 1 segments - 537,603,840 samples 
                              17,920.13s (4.98 hours) - int16 dtype - 32.04 GiB
  file_path: /home/heberto/buzaki/fCamk1_200827_sess9/fCamk1_200827_sess9.dat
```
We see this is around 5 hours of recording. 

In [None]:
file_path = session_files_path_list[0]
file_path = session_path / f"{session_path.stem}.lfp"
assert file_path.is_file(), file_path

from spikeinterface.extractors.neoextractors import NeuroScopeRecordingExtractor

recording = NeuroScopeRecordingExtractor(file_path=file_path)
recording

Here we see that the sampling rate is wrong. Let's test with the current value from the paper

In [None]:
sampling_rate = 1.25 * 10**3 # 1.25 kHz
duration_seconds = recording.get_num_frames() / sampling_rate
duration_minutes = duration_seconds / 60.0
duration_hours = duration_minutes / 60.0
duration_hours

The output is five hours. This should be corrected

## Other signals in top folder
We have the following
* `analogin.dat`
* `auxiliary.dat`
* `supply.dat`
* `time.dat`
* `digitalin.dat`


Let's check the XML first from where their metadata comes from:

In [None]:
xml_file_path =  session_path /  "fCamk1_200827_sess9.xml"

from xml.etree import ElementTree

tree = ElementTree.parse(xml_file_path)
root = tree.getroot()
acq = root.find('acquisitionSystem')
nbits = int(acq.find('nBits').text)
num_channels = int(acq.find('nChannels').text)
sampling_rate = float(acq.find('samplingRate').text)
voltage_range = float(acq.find('voltageRange').text)
# offset = int(acq.find('offset').text)
amplification = float(acq.find('amplification').text)

print(f"{num_channels=}, {sampling_rate=}, {voltage_range=}, {amplification=}")


In [None]:
signal_names = ["digitalin.dat", "time.dat", "supply.dat", "auxiliary.dat", "analogin.dat"]


for name in signal_names:
    file_path = session_path / f"{name}" 
    assert file_path.is_file(), file_path

    sig_dtype = 'int16' if nbits <= 16 else 'int32'
    data = np.memmap(file_path, dtype=sig_dtype, mode='r', offset=0).reshape(-1, num_channels)
    time = data.shape[0] / sampling_rate
    num_samples = data.shape[0]

    print("---------------")
    print(f"{file_path.name=} \n")
    print(f"num_samples: {num_samples:,}, time: {time / 60.0:.2f} minutes")

Output:
They seem too short:
```
---------------
file_path.name='digitalin.dat' 

num_samples: 16,800,120, time: 9.33 minutes
---------------
file_path.name='time.dat' 

num_samples: 33,600,240, time: 18.67 minutes
---------------
file_path.name='supply.dat' 

num_samples: 16,800,120, time: 9.33 minutes
---------------
file_path.name='auxiliary.dat' 

num_samples: 50,400,360, time: 28.00 minutes
---------------
file_path.name='analogin.dat' 

num_samples: 134,400,960, time: 74.67 minutes
```