In [1]:
import os
import logging

import numpy as np
import pandas as pd
from pandas.testing import assert_frame_equal

from hbmep.config import Config
from hbmep.model import BaseModel

from paper.utils import load_rat_data, setup_logging

logger = logging.getLogger(__name__)

DIR = "/mnt/hdd1/acute_mapping/proc/physio"
SUBJECTS = range(1, 9)

DATA_DIR = "/home/vishu/data/hbmep-processed/rat"
CONFIG_DIR = "/home/vishu/repos/rat-mapping-paper/configs"

setup_logging(dir=DATA_DIR, fname="read_data.log")


2025-01-24 11:29:43,878 - paper.utils.utils - INFO - Logging to /home/vishu/data/hbmep-processed/rat/read_data.log


In [2]:
def get_dirs(subdir_pattern=None):
    d = {}
    for subject_ind in SUBJECTS:
        subject = f"amap{subject_ind:002}"
        curr_dirs = os.listdir(
            os.path.join(DIR, subject)
        )
        if subdir_pattern is not None:
            curr_dirs = [d for d in curr_dirs if subdir_pattern in d]
        d[subject] = sorted(curr_dirs)
    return d


# J_RCML_000

In [3]:
subdir_pattern = "J_RCML_000"
dirs = get_dirs(subdir_pattern=subdir_pattern)
dirs


{'amap01': ['2023-03-13_J_RCML_000'],
 'amap02': ['2023-03-17_J_RCML_000'],
 'amap03': ['2023-03-21_J_RCML_000'],
 'amap04': ['2023-03-24_J_RCML_000'],
 'amap05': ['2023-03-27_J_RCML_000'],
 'amap06': ['2023-03-29_J_RCML_000'],
 'amap07': ['2023-04-03_J_RCML_000'],
 'amap08': ['2023-04-04_J_RCML_000']}

In [4]:
subdir_pattern = ["*J_RCML_000*"]
df, mat, time, auc_window, muscles_map = load_rat_data(
    dir=DIR, subdir_pattern=subdir_pattern, subjects=SUBJECTS
)


100%|█████████████████████████████████████████████████████████████████| 8/8 [00:02<00:00,  3.05it/s]
2025-01-13 13:13:35,099 - hbmep.utils.utils - INFO - func:load_rat_data took: 2.63 sec


In [None]:
# # Verify
# _dir = os.path.join(DATA_DIR, "J_RCML")
# src = os.path.join(_dir, "data.csv")
# _df = pd.read_csv(src)
# src = os.path.join(_dir, "mat.npy")
# _mat = np.load(src)
# assert_frame_equal(df, _df)
# assert (mat == _mat).all()


In [5]:
# Plot dataset
toml_path = os.path.join(CONFIG_DIR, "J_RCML_000.toml")
config = Config(toml_path=toml_path)
config.BUILD_DIR = os.path.join(DATA_DIR, "J_RCML")
model = BaseModel(config=config)

df, encoder_dict = model.load(df=df)
model.plot(df=df, encoder_dict=encoder_dict)


2025-01-13 13:13:35,125 - hbmep.model.baseline - INFO - Initialized base_model
2025-01-13 13:13:35,125 - hbmep.dataset.core - INFO - Artefacts will be stored here - /home/vishu/data/hbmep-processed/rat/J_RCML
2025-01-13 13:13:35,127 - hbmep.dataset.core - INFO - Processing data ...
2025-01-13 13:13:35,128 - hbmep.utils.utils - INFO - func:load took: 0.00 sec
2025-01-13 13:13:35,129 - hbmep.plotter.core - INFO - Rendering dataset ...
2025-01-13 13:13:35,987 - hbmep.plotter.core - INFO - Page 1 of 15 done.
2025-01-13 13:13:39,163 - hbmep.plotter.core - INFO - Page 2 of 15 done.
2025-01-13 13:13:42,357 - hbmep.plotter.core - INFO - Page 3 of 15 done.
2025-01-13 13:13:45,766 - hbmep.plotter.core - INFO - Page 4 of 15 done.
2025-01-13 13:13:49,390 - hbmep.plotter.core - INFO - Page 5 of 15 done.
2025-01-13 13:13:52,968 - hbmep.plotter.core - INFO - Page 6 of 15 done.
2025-01-13 13:13:56,473 - hbmep.plotter.core - INFO - Page 7 of 15 done.
2025-01-13 13:13:59,536 - hbmep.plotter.core - INFO 

# L_CIRC

In [4]:
subdir_pattern = "L_CIRC"
dirs = get_dirs(subdir_pattern=subdir_pattern)
dirs


{'amap01': ['2023-03-13_L_CIRC_000'],
 'amap02': ['2023-03-17_L_CIRC_000'],
 'amap03': ['2023-03-21_L_CIRC_000'],
 'amap04': ['2023-03-24_L_CIRC_000'],
 'amap05': ['2023-03-27_L_CIRC_000'],
 'amap06': ['2023-03-29_L_CIRC_000', '2023-03-29_L_CIRC_001'],
 'amap07': ['2023-04-03_L_CIRC_000'],
 'amap08': ['2023-04-04_L_CIRC_000']}

In [5]:
subdir_pattern = ["*L_CIRC*"]
df, mat, time, auc_window, muscles_map = load_rat_data(
    dir=DIR, subdir_pattern=subdir_pattern, subjects=SUBJECTS
)


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:03<00:00,  2.51it/s]
2025-01-23 09:45:40,251 - hbmep.utils.utils - INFO - func:load_rat_data took: 3.19 sec


In [None]:
# # Save
# build_dir = os.path.join(DATA_DIR, "L_CIRC")
# os.makedirs(build_dir, exist_ok=True)
# dest = os.path.join(build_dir, "data.csv")
# df.to_csv(dest, index=False)
# dest = os.path.join(build_dir, "mat.npy")
# np.save(dest, mat)

# # Verify
# _dir = os.path.join(DATA_DIR, "L_CIRC")
# src = os.path.join(_dir, "data.csv")
# _df = pd.read_csv(src)
# src = os.path.join(_dir, "mat.npy")
# _mat = np.load(src)
# assert_frame_equal(df, _df)
# assert (mat == _mat).all()


In [10]:
# Plot dataset
toml_path = os.path.join(CONFIG_DIR, "L_CIRC.toml")
config = Config(toml_path=toml_path)
config.BUILD_DIR = os.path.join(DATA_DIR, "L_CIRC")
model = BaseModel(config=config)

df, encoder_dict = model.load(df=df)
model.plot(df=df, encoder_dict=encoder_dict, mep_matrix=mat)


2025-01-23 09:48:25,329 - hbmep.model.baseline - INFO - Initialized base_model
2025-01-23 09:48:25,330 - hbmep.dataset.core - INFO - Artefacts will be stored here - /home/vishu/data/hbmep-processed/rat/L_CIRC
2025-01-23 09:48:25,331 - hbmep.dataset.core - INFO - Processing data ...
2025-01-23 09:48:25,333 - hbmep.utils.utils - INFO - func:load took: 0.00 sec
2025-01-23 09:48:25,333 - hbmep.plotter.core - INFO - Rendering dataset ...
2025-01-23 09:48:27,064 - hbmep.plotter.core - INFO - Page 1 of 17 done.
2025-01-23 09:48:34,120 - hbmep.plotter.core - INFO - Page 2 of 17 done.
2025-01-23 09:48:41,524 - hbmep.plotter.core - INFO - Page 3 of 17 done.
2025-01-23 09:48:48,902 - hbmep.plotter.core - INFO - Page 4 of 17 done.
2025-01-23 09:48:56,380 - hbmep.plotter.core - INFO - Page 5 of 17 done.
2025-01-23 09:49:04,094 - hbmep.plotter.core - INFO - Page 6 of 17 done.
2025-01-23 09:49:11,199 - hbmep.plotter.core - INFO - Page 7 of 17 done.
2025-01-23 09:49:20,375 - hbmep.plotter.core - INFO 

# L_SHIE

In [3]:
subdir_pattern = "L_SHIE"
dirs = get_dirs(subdir_pattern=subdir_pattern)
dirs


{'amap01': ['2023-03-13_L_SHIE_000'],
 'amap02': ['2023-03-17_L_SHIE_000', '2023-03-17_L_SHIE_001'],
 'amap03': ['2023-03-21_L_SHIE_000'],
 'amap04': ['2023-03-24_L_SHIE_000'],
 'amap05': ['2023-03-27_L_SHIE_000'],
 'amap06': ['2023-03-29_L_SHIE_000'],
 'amap07': ['2023-04-03_L_SHIE_000'],
 'amap08': ['2023-04-04_L_SHIE_000']}

In [4]:
subdir_pattern = ["*L_SHIE*"]
df, mat, time, auc_window, muscles_map = load_rat_data(
    dir=DIR, subdir_pattern=subdir_pattern, subjects=SUBJECTS
)


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:01<00:00,  5.15it/s]
2025-01-24 11:30:04,674 - hbmep.utils.utils - INFO - func:load_rat_data took: 1.56 sec


In [None]:
# # Verify
# _dir = os.path.join(DATA_DIR, "L_SHIE")
# src = os.path.join(_dir, "data.csv")
# _df = pd.read_csv(src)
# src = os.path.join(_dir, "mat.npy")
# _mat = np.load(src)
# assert_frame_equal(df, _df)
# assert (mat == _mat).all()


In [5]:
# Plot dataset
toml_path = os.path.join(CONFIG_DIR, "L_SHIE.toml")
config = Config(toml_path=toml_path)
config.BUILD_DIR = os.path.join(DATA_DIR, "L_SHIE")
model = BaseModel(config=config)

df, encoder_dict = model.load(df=df)
model.plot(df=df, encoder_dict=encoder_dict, mep_matrix=mat)


2025-01-24 11:30:07,919 - hbmep.model.baseline - INFO - Initialized base_model
2025-01-24 11:30:07,920 - hbmep.dataset.core - INFO - Artefacts will be stored here - /home/vishu/data/hbmep-processed/rat/L_SHIE
2025-01-24 11:30:07,924 - hbmep.dataset.core - INFO - Processing data ...
2025-01-24 11:30:07,927 - hbmep.utils.utils - INFO - func:load took: 0.01 sec
2025-01-24 11:30:07,928 - hbmep.plotter.core - INFO - Rendering dataset ...
2025-01-24 11:30:11,526 - hbmep.plotter.core - INFO - Page 1 of 7 done.
2025-01-24 11:30:21,079 - hbmep.plotter.core - INFO - Page 2 of 7 done.
2025-01-24 11:30:33,616 - hbmep.plotter.core - INFO - Page 3 of 7 done.
2025-01-24 11:30:45,705 - hbmep.plotter.core - INFO - Page 4 of 7 done.
2025-01-24 11:30:58,822 - hbmep.plotter.core - INFO - Page 5 of 7 done.
2025-01-24 11:31:22,143 - hbmep.plotter.core - INFO - Page 6 of 7 done.
2025-01-24 11:31:34,694 - hbmep.plotter.core - INFO - Page 7 of 7 done.
2025-01-24 11:31:37,113 - hbmep.plotter.core - INFO - Saved

# J_SHAP

In [10]:
subdir_pattern = "J_SHAP"
dirs = get_dirs(subdir_pattern=subdir_pattern)
dirs


{'amap01': ['2023-03-13_J_SHAP_000', '2023-03-13_J_SHAP_001'],
 'amap02': ['2023-03-17_J_SHAP_000', '2023-03-17_J_SHAP_001'],
 'amap03': ['2023-03-21_J_SHAP_000', '2023-03-21_J_SHAP_001'],
 'amap04': ['2023-03-24_J_SHAP_000', '2023-03-24_J_SHAP_001'],
 'amap05': ['2023-03-27_J_SHAP_000', '2023-03-27_J_SHAP_001'],
 'amap06': ['2023-03-29_J_SHAP_000', '2023-03-29_J_SHAP_001'],
 'amap07': ['2023-04-03_J_SHAP_000', '2023-04-03_J_SHAP_001'],
 'amap08': ['2023-04-04_J_SHAP_000', '2023-04-04_J_SHAP_001']}

In [11]:
subdir_pattern = ["*J_SHAP*"]
df, mat, time, auc_window, muscles_map = load_rat_data(
    dir=DIR, subdir_pattern=subdir_pattern, subjects=SUBJECTS
)

df.pulse_amplitude = (
    df
    .apply(
        lambda x: (x.pulse_amplitude * 4) if x.compound_charge_params == "80-0-20-400" else x.pulse_amplitude,
        axis=1
    )
)


100%|█████████████████████████████████████████████████████████████████| 8/8 [00:07<00:00,  1.12it/s]


In [None]:
# # checksum
# src = "/home/vishu/data/hbmep-processed/rat-archived/J_SHAP/data.csv"
# _df = pd.read_csv(src)
# src = "/home/vishu/data/hbmep-processed/rat-archived/J_SHAP/mat.npy"
# _mat = np.load(src)
# assert_frame_equal(df, _df)
# assert (mat == _mat).all()

# # Save
# BUILD_DIR = J_SHAP_DIR
# os.makedirs(BUILD_DIR, exist_ok=True)
# dest = os.path.join(BUILD_DIR, "data.csv")
# df.to_csv(dest, index=False)
# dest = os.path.join(BUILD_DIR, "mat.npy")
# np.save(dest, mat)


In [13]:
# Verify
_dir = os.path.join(DATA_DIR, "J_SHAP")
src = os.path.join(_dir, "data.csv")
_df = pd.read_csv(src)
src = os.path.join(_dir, "mat.npy")
_mat = np.load(src)
assert_frame_equal(df, _df)
assert (mat == _mat).all()


AssertionError: Attributes of DataFrame.iloc[:, 39] (column name="compound_position") are different

Attribute "dtype" are different
[left]:  int64
[right]: object

In [12]:
# Plot dataset
toml_path = os.path.join(CONFIG_DIR, "J_SHAP.toml")
config = Config(toml_path=toml_path)
config.BUILD_DIR = os.path.join(DATA_DIR, "J_SHAP")
model = BaseModel(config=config)

df, encoder_dict = model.load(df=df)
model.plot(df=df, encoder_dict=encoder_dict)
