In [16]:
import numpy as np
import pandas as pd

import torch as th
from torch.optim import Adam
from torcheval.metrics import MeanSquaredError
from training_loop import TrainingLoop, SimpleTrainingStep
from training_loop.callbacks import EarlyStopping, TensorBoardLogger
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from envs import BlendEnv, reconstruct_dict

import datetime
from models import IL_MLP_simple
import os

In [17]:
now = datetime.datetime.now().strftime('%m%d-%H%M')
modelname = f"MLP_{now}"

In [18]:
cols_obs = [
    'sources_s1', 'sources_s2',
    'blenders_j1', 'blenders_j2', 'blenders_j3', 'blenders_j4',
    'demands_p1', 'demands_p2',
    'properties_j1_q1', 'properties_j2_q1', 'properties_j3_q1', 'properties_j4_q1',
    'sources_avail_next_0_s1', 'sources_avail_next_0_s2', 'demands_avail_next_0_p1', 'demands_avail_next_0_p2',
    'sources_avail_next_1_s1', 'sources_avail_next_1_s2', 'demands_avail_next_1_p1', 'demands_avail_next_1_p2',
    'sources_avail_next_2_s1', 'sources_avail_next_2_s2', 'demands_avail_next_2_p1', 'demands_avail_next_2_p2',
    'sources_avail_next_3_s1', 'sources_avail_next_3_s2', 'demands_avail_next_3_p1', 'demands_avail_next_3_p2',
    'sources_avail_next_4_s1', 'sources_avail_next_4_s2', 'demands_avail_next_4_p1', 'demands_avail_next_4_p2',
    'sources_avail_next_5_s1', 'sources_avail_next_5_s2', 'demands_avail_next_5_p1', 'demands_avail_next_5_p2',
    't'
]
cols_act = [
    'source_blend_s1_j1', 'source_blend_s1_j2', 'source_blend_s1_j3', 'source_blend_s1_j4',
    'source_blend_s2_j1', 'source_blend_s2_j2', 'source_blend_s2_j3', 'source_blend_s2_j4',
    'blend_demand_j1_p1', 'blend_demand_j1_p2',
    'blend_demand_j2_p1', 'blend_demand_j2_p2',
    'blend_demand_j3_p1', 'blend_demand_j3_p2',
    'blend_demand_j4_p1', 'blend_demand_j4_p2',
    'tau_s1', 'tau_s2',
    'delta_p1', 'delta_p2'
]

In [19]:
X_train, y_train = pd.DataFrame([], columns=cols_obs), pd.DataFrame([], columns=cols_act)

for file in os.listdir("./data/simple"):
    df = pd.read_csv(os.path.join("./data/simple", file))
    if "OBS" in file:
        X_train = pd.concat([X_train, df])
    elif "ACT" in file:
        y_train = pd.concat([y_train, df])

X_train = X_train.drop("Unnamed: 0", axis=1)
y_train = y_train.drop("Unnamed: 0", axis=1)

X_train, X_temp, y_train, y_temp = train_test_split(X_train, y_train, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [20]:
class MyData(Dataset):
    def __init__(self, df_X: pd.DataFrame, df_y: pd.DataFrame) -> None:
        super().__init__()
        self.df_X = df_X
        self.df_y = df_y
        
    def __getitem__(self, index):
        return(th.Tensor(self.df_X.iloc[index]), th.Tensor(self.df_y.iloc[index]))
    
    def __len__(self):
        return(self.df_X.shape[0])

In [21]:
train_ds = MyData(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size=16)
val_ds = MyData(X_val, y_val)
val_dl = DataLoader(val_ds, batch_size=16)
test_ds = MyData(X_test, y_test)
test_dl = DataLoader(test_ds, batch_size=16)
len(train_ds)

866

In [22]:
model = IL_MLP_simple()

In [23]:
loop = TrainingLoop(
    model,
    step=SimpleTrainingStep(
        optimizer_fn = lambda params: Adam(params, lr=5e-3),
        loss = th.nn.MSELoss(),
        # metrics = ('MSE', MeanSquaredError()),
    ),
    device='cpu'
)
loop.fit(
    train_dl,
    val_dl,
    epochs=100,
    callbacks=[
        EarlyStopping(monitor='val_loss', mode='min', patience=20),
        TensorBoardLogger(f"./logs/simple/imit/{modelname}")
    ],
    
)

Epoch 1/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 2/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 3/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 4/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 5/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 6/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 7/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 8/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 9/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 10/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 11/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 12/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 13/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 14/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 15/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 16/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 17/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 18/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 19/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 20/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 21/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 22/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 23/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 24/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 25/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 26/100:   0%|          | 0/74 [00:00<?, ?it/s]

Epoch 27/100:   0%|          | 0/74 [00:00<?, ?it/s]

(                  loss  batch  epoch
 epoch batch                         
 1      1     56.680298      1      1
        2     53.692852      2      1
        3     49.449324      3      1
        4     51.094777      4      1
        5     49.756416      5      1
 ...                ...    ...    ...
 27     52    16.542521     52     27
        53    16.774468     53     27
        54    16.891923     54     27
        55    16.717352     55     27
       -1     16.717352     -1     27
 
 [1512 rows x 3 columns],
                       val_loss  val_batch  val_epoch
 val_epoch val_batch                                 
 1          1         21.496624          1          1
            2         22.349766          2          1
            3         25.809706          3          1
            4         23.491105          4          1
            5         26.605465          5          1
 ...                        ...        ...        ...
 27         16        24.495763         16    

In [16]:
th.save(model.state_dict(), f"weights/{modelname}.pth")

In [27]:
connections = {
    "source_blend": {
        "s1": ["j1", "j2", "j3", "j4"],
        "s2": ["j1", "j2", "j3", "j4"]
    },
    "blend_blend": {"j1": [], "j2": [], "j3": [], "j4": []},
    "blend_demand": {"j1": ["p1", "p2"],
        "j2": ["p1", "p2"],
        "j3": ["p1", "p2"],
        "j4": ["p1", "p2"]
    }
}

action_sample = {
    'source_blend':{
        's1': {'j1':1, 'j2':1, 'j3':1, 'j4':0}, # From s1 to b1, from s1 to b2 etc...
        's2': {'j1':1, 'j2':1, 'j3':0, 'j4':1},
    },
    
    'blend_blend':{},
    
    'blend_demand':{
        'j1': {'p1':1, 'p2':0},
        'j2': {'p1':1, 'p2':2},
        'j3': {'p1':1, 'p2':2},
        'j4': {'p1':1, 'p2':2}
    },
    
    "tau": {"s1": 10, "s2": 10},
    
    "delta": {"p1": 0, "p2": 0}
}

In [36]:
M=P=Q=B=Z=D=0

In [37]:
env = BlendEnv(connections = connections, action_sample=action_sample, M=M, P=P, Q=Q, B=B, Z=Z, D=D)

In [38]:
env = env
with th.no_grad():
    obs = env.reset()
    obs, obs_dict = obs
    for k in range(env.T):
        action = np.array(model.forward(th.Tensor(obs)))
        # print(action)
        print("\n\n   ",reconstruct_dict(action, env.mapping_act))
        obs, reward, done, term, _ = env.step(action)
        dobs = reconstruct_dict(obs, env.mapping_obs)
        print("\n    >>     ",dobs["sources"], dobs["blenders"], dobs["demands"])
        print("   " ,reward)



    {'source_blend': {'s1': {'j1': 0.0, 'j2': 0.0, 'j3': 0.0, 'j4': 0.0}, 's2': {'j1': 5.8200917, 'j2': 3.0075731, 'j3': 0.0, 'j4': 0.0}}, 'blend_demand': {'j1': {'p1': 0.0, 'p2': 0.0}, 'j2': {'p1': 0.0, 'p2': 0.0}, 'j3': {'p1': 0.0, 'p2': 0.0}, 'j4': {'p1': 0.0, 'p2': 0.0}}, 'tau': {'s1': 1.7359276, 's2': 16.801428}, 'delta': {'p1': 0.0, 'p2': 0.0}}

    >>      {'s1': 1.7359276, 's2': 7.973763} {'j1': 5.8200917, 'j2': 3.0075731, 'j3': 0.0, 'j4': 0.0} {'p1': 0.0, 'p2': 0.0}
    0.0


    {'source_blend': {'s1': {'j1': 0.0, 'j2': 0.0, 'j3': 0.0, 'j4': 0.0}, 's2': {'j1': 2.9211133, 'j2': 0.0, 'j3': 0.28612745, 'j4': 0.0}}, 'blend_demand': {'j1': {'p1': 0.0, 'p2': 0.0}, 'j2': {'p1': 0.0, 'p2': 0.0}, 'j3': {'p1': 2.7933748, 'p2': 0.0}, 'j4': {'p1': 0.0, 'p2': 0.0}}, 'tau': {'s1': 0.0, 's2': 16.264774}, 'delta': {'p1': 0.0, 'p2': 0.0}}

    >>      {'s1': 1.7359276, 's2': 21.031296} {'j1': 8.741205, 'j2': 3.0075731, 'j3': 0.0, 'j4': 0.0} {'p1': 0.0, 'p2': 0.0}
    0.0


    {'source_blen

In [26]:
env.mapping_act

[(0, ['source_blend', 's1', 'j1']),
 (1, ['source_blend', 's1', 'j2']),
 (2, ['source_blend', 's1', 'j3']),
 (3, ['source_blend', 's1', 'j4']),
 (4, ['source_blend', 's2', 'j1']),
 (5, ['source_blend', 's2', 'j2']),
 (6, ['source_blend', 's2', 'j3']),
 (7, ['source_blend', 's2', 'j4']),
 (8, ['blend_blend', 'j1', 'j5']),
 (9, ['blend_blend', 'j1', 'j6']),
 (10, ['blend_blend', 'j1', 'j7']),
 (11, ['blend_blend', 'j1', 'j8']),
 (12, ['blend_blend', 'j2', 'j5']),
 (13, ['blend_blend', 'j2', 'j6']),
 (14, ['blend_blend', 'j2', 'j7']),
 (15, ['blend_blend', 'j2', 'j8']),
 (16, ['blend_blend', 'j3', 'j5']),
 (17, ['blend_blend', 'j3', 'j6']),
 (18, ['blend_blend', 'j3', 'j7']),
 (19, ['blend_blend', 'j3', 'j8']),
 (20, ['blend_blend', 'j4', 'j5']),
 (21, ['blend_blend', 'j4', 'j6']),
 (22, ['blend_blend', 'j4', 'j7']),
 (23, ['blend_blend', 'j4', 'j8']),
 (24, ['blend_demand', 'j5', 'p1']),
 (25, ['blend_demand', 'j5', 'p2']),
 (26, ['blend_demand', 'j6', 'p1']),
 (27, ['blend_demand', 'j6'