In [1]:
import os
import pickle

import pandas as pd
from pandas.testing import assert_frame_equal
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

from hbmep.config import Config
from hbmep.model import BaseModel
from hbmep.model.utils import Site as site

# from models import HierarchicalBayesianModel
from constants import (
    TOML_PATH,
    DATA_PATH,
    BUILD_DIR,
    INFERENCE_FILE
)


In [2]:
config = Config(toml_path=TOML_PATH)
config.BUILD_DIR = BUILD_DIR
columns = config.FEATURES
config.FEATURES = config.FEATURES[:1] + [config.FEATURES[1:]]
model = BaseModel(config=config)


An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


In [3]:
data = pd.read_csv(DATA_PATH)
positions_map = {
    "-C6LC": "-C",
    "C6LC-": "C-",
    "C6LC-C6LX": "C-X",
    "C6LX-C6LC": "X-C",
}
charges_map = {
    "50-0-50-100": "Biphasic",
    "20-0-80-25": "Pseudo-Mono"
}
data[model._features[1][0]] = data[model._features[1][0]].replace(positions_map)
data[model._features[1][1]] = data[model._features[1][1]].replace(charges_map)


In [4]:
ind = data[model._features[1]].apply(tuple, axis=1).isin([('-C', 'Biphasic')])
assert data.loc[ind, model._features[1][0]].nunique() == 1
data.loc[ind, model._features[1][0]] = " -C"

df = data.copy()
print(df.shape)

t = sorted(df[columns].apply(tuple, axis=1).unique())
print(len(t))
print("No missing conditions for any rat")


(3572, 46)
64
No missing conditions for any rat


In [None]:
# REFERENCE = [(' -C6LC', '50-0-50-100')]
# WITH_GROUND = [
#     ('-C6LC', '20-0-80-25'),
#     # ('-C6LC', '50-0-50-100'),
#     ('C6LC-', '20-0-80-25'),
#     ('C6LC-', '50-0-50-100'),
# ]
# NO_GROUND = [
#     ('C6LC-C6LX', '20-0-80-25'),
#     ('C6LC-C6LX', '50-0-50-100'),
#     ('C6LX-C6LC', '20-0-80-25'),
#     ('C6LX-C6LC', '50-0-50-100')
# ]

REFERENCE = [(' -C', 'Biphasic')]
WITH_GROUND = [
    ('-C', 'Pseudo-Mono'),
    ('C-', 'Pseudo-Mono'),
    ('C-', 'Biphasic'),
]
NO_GROUND = [
    ('C-X', 'Pseudo-Mono'),
    ('C-X', 'Biphasic'),
    ('X-C', 'Pseudo-Mono'),
    ('X-C', 'Biphasic')
]


In [6]:
run_id = "no-ground"
if run_id == "no-ground":
    subset = REFERENCE + NO_GROUND

ind = data[model._features[1]].apply(tuple, axis=1).isin(subset)
df = data[ind].reset_index(drop=True).copy()

df, encoder_dict = model.load(df=df)
encoder_dict[model.features[1]].classes_


array([' -C___Biphasic', 'C-X___Biphasic', 'C-X___Pseudo-Mono',
       'X-C___Biphasic', 'X-C___Pseudo-Mono'], dtype=object)

In [7]:
run_id = "ground"
if run_id == "ground":
    subset = REFERENCE + WITH_GROUND

ind = data[model._features[1]].apply(tuple, axis=1).isin(subset)
df = data[ind].reset_index(drop=True).copy()

df, encoder_dict = model.load(df=df)
encoder_dict[model.features[1]].classes_


array([' -C___Biphasic', '-C___Pseudo-Mono', 'C-___Biphasic',
       'C-___Pseudo-Mono'], dtype=object)

In [8]:
run_id = "all"
if run_id == "all":
    subset = REFERENCE + NO_GROUND + WITH_GROUND

ind = data[model._features[1]].apply(tuple, axis=1).isin(subset)
df = data[ind].reset_index(drop=True).copy()

df, encoder_dict = model.load(df=df)
encoder_dict[model.features[1]].classes_


array([' -C___Biphasic', '-C___Pseudo-Mono', 'C-X___Biphasic',
       'C-X___Pseudo-Mono', 'C-___Biphasic', 'C-___Pseudo-Mono',
       'X-C___Biphasic', 'X-C___Pseudo-Mono'], dtype=object)