In [None]:
import d3rlpy
import os
import pandas as pd
import numpy as np
from d3rlpy.dataset import MDPDataset

In [None]:
base_dir = os.path.expanduser("./Data")

def build_dataset_from_csv(path, episode_len=48): # process datasets 7-12
    df = pd.read_csv(path)

    s1 = df.iloc[:, 3].values.astype(np.float32)
    s2_raw = df.iloc[:, 5].values.astype(np.float32)
    s3 = df.iloc[:, 6].values
    s2 = np.where(s2_raw == 0, 0, 1).astype(np.float32)

    states = np.stack([s1, s2, s3], axis=1)

    actions_raw = df.iloc[:, 4].values
    actions = np.where(actions_raw == 0, 0, 1).astype(np.int64)

    s1 = df.iloc[:, 3].values.astype(np.float32)
    rewards = (
        -( (s1 > 140) * np.abs(s1 - 140) ** 1.10
         + (s1 < 80)  * (s1 - 80) ** 2 ) / 30.0
    ).astype(np.float32)

    rews = (rewards+40.833332)/40.833332

    n = len(df)
    terminals = np.zeros(n, dtype=np.float32)
    terminals[episode_len - 1 :: episode_len] = 1.0

    return MDPDataset(
        observations=states,
        actions=actions,
        rewards=rews,
        terminals=terminals
    )

datasets = {}
for i in range(7, 13):
    name = f"pat{i}"
    csv_path = os.path.join(base_dir, f"{name}.csv")
    datasets[name] = build_dataset_from_csv(csv_path)

    print(f"{name} -> {datasets[name].size()} transitions, "
          f"{len(datasets[name].episodes)} episodes")

    h5_path = os.path.join(base_dir, f"{name}_binary_meal.h5")
    datasets[name].dump(h5_path)


data_pat7  = datasets["pat7"]
data_pat8  = datasets["pat8"]
data_pat9  = datasets["pat9"]
data_pat10 = datasets["pat10"]
data_pat11 = datasets["pat11"]
data_pat12 = datasets["pat12"]

In [4]:
from d3rlpy.preprocessing import StandardObservationScaler
import d3rlpy
from d3rlpy.models.encoders import VectorEncoderFactory
from d3rlpy.metrics import TDErrorEvaluator
from d3rlpy.metrics import InitialStateValueEstimationEvaluator

In [12]:
def train_sac(MDP_data, gamma = 0.9, n_steps = 30000):

    encoder_factory_1 = VectorEncoderFactory(hidden_units=[128, 128], activation='relu')
    encoder_factory_2 = VectorEncoderFactory(hidden_units=[128, 128], activation='relu')
    sac_beh = d3rlpy.algos.DiscreteSACConfig(observation_scaler=StandardObservationScaler(),batch_size=64, gamma=gamma,
                                             critic_encoder_factory=encoder_factory_1,
                                             actor_encoder_factory=encoder_factory_2,
                                             target_update_interval=1000).create(device="cuda:0")
    sac_beh.build_with_dataset(MDP_data)
    #env_evaluator = EnvironmentEvaluator(env)
    sac_beh.fit(MDP_data,
        n_steps=n_steps,
        n_steps_per_epoch=2000,
        show_progress=False,
        evaluators={
            'TD': TDErrorEvaluator(),
            'Value': InitialStateValueEstimationEvaluator(),
        },
    )

    return sac_beh

In [6]:
datasets = {f"pat{i}": globals()[f"data_pat{i}"] for i in range(7, 13)}

In [None]:
d3rlpy.seed(123)
base_dir = os.path.expanduser(
    "./Models"
)


for i in range(7, 13):
    name = f"pat{i}"
    print(f"Training SAC for {name} ...")

    mdp = datasets[name]              
    model = train_sac(mdp, gamma=0.9) 


    globals()[f"sac_beh_{i}"] = model


    model_path = os.path.join(base_dir, f"sac_model_pat{i}.d3")
    model.save(model_path)

    print(f"Saved sac_beh_{i} to {model_path}")

In [None]:
d3rlpy.seed(123)
model7 = train_sac(datasets[f"pat{7}"], gamma=0.9,n_steps = 24000) 


globals()[f"sac_beh_{7}"] = model7


model_path = os.path.join(base_dir, f"sac_model_pat{7}.d3")
model.save(model_path)

print(f"Saved sac_beh_{7} to {model_path}")

In [None]:
d3rlpy.seed(123)
model10 = train_sac(datasets[f"pat{10}"], gamma=0.9,n_steps = 18000) 


globals()[f"sac_beh_{10}"] = model10


model_path = os.path.join(base_dir, f"sac_model_pat{10}.d3")
model.save(model_path)

print(f"Saved sac_beh_{10} to {model_path}")

In [None]:
d3rlpy.seed(123)
model12 = train_sac(datasets[f"pat{12}"], gamma=0.9,n_steps = 22000) 


globals()[f"sac_beh_{12}"] = model12


model_path = os.path.join(base_dir, f"sac_model_pat{12}.d3")
model.save(model_path)

print(f"Saved sac_beh_{12} to {model_path}")