# Imports

In [1]:
import os
import math
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# Classes & Helpers

## Data

In [26]:
class Data():
    def __init__(self,
                 x_min, x_max,
                 t_min, t_max,
                 test_dir,
                 eps=1e-5,
                 device='cpu',
                 dtype=torch.float32):

        self.x_min = x_min
        self.x_max = x_max
        self.t_min = t_min
        self.t_max = t_max
        self.test_dir = test_dir
        self.eps = eps
        self.device = device
        self.dtype = dtype


    def _generate_random_numbers(self, min_, max_, N):
        return min_ + (max_ - min_) * torch.rand(size=(N,), dtype=self.dtype)


    # *** Create in-domain points ***
    def sample_domain(self, N_domain, x_min, x_max, t_min, t_max):
        # Random Grid
        x_domain = self._generate_random_numbers(x_min, x_max, N_domain)
        t_domain = self._generate_random_numbers(t_min, t_max, N_domain)
        domain_data = torch.stack((x_domain, t_domain), dim=1)
        return torch.tensor(domain_data, dtype=self.dtype, device=self.device, requires_grad=True)


    # *** Boundary Conditions ***
    def sample_boundary(self, Nt_bound):
        # Random boundary points
        t_bound = self._generate_random_numbers(self.t_min, self.t_max, Nt_bound)
        x_left = - torch.ones(1, dtype=self.dtype)
        x_right = torch.ones(1, dtype=self.dtype)

        bound_data_left = torch.stack(torch.meshgrid(x_left, t_bound)).view(2, -1).permute(1, 0)
        bound_data_right = torch.stack(torch.meshgrid(x_right, t_bound)).view(2, -1).permute(1, 0)
        bound_data = torch.cat([bound_data_left, bound_data_right]).requires_grad_(True).to(self.device)

        u_bound = torch.zeros(len(bound_data), 1, dtype=self.dtype, device=self.device)

        return bound_data, u_bound


    # *** Initial Condition ***
    def sample_initial(self, Nx_init, x_min, x_max):
        # Random initial points
        x_init = self._generate_random_numbers(x_min, x_max, Nx_init)
        t_init = torch.zeros(1, dtype=self.dtype)
        init_data = torch.stack(torch.meshgrid(x_init, t_init)).view(2, -1).permute(1, 0).requires_grad_(True).to(self.device)

        u_init = - torch.sin(math.pi * x_init).to(self.device)

        return init_data, u_init

    # *** Test set ***
    def sample_test(self):
        test_data = pd.read_csv(self.test_dir).dropna().to_numpy()
        return torch.tensor(test_data, dtype=self.dtype, device=self.device, requires_grad=True)

## Networks

In [27]:
class MLPBase(nn.Module):
    def __init__(self, layers, activation=nn.Tanh(), weight_init=None, bias_init=None, device='cpu'):
        super().__init__()
        self.layers = layers
        self.activation = activation
        self.weight_init = weight_init
        self.bias_init = bias_init

        dense_layers = [
            self.dense_layer(in_features=self.layers[i], out_features=self.layers[i + 1])
            for i in range(len(layers) - 2)]
        dense_layers.append(nn.Linear(in_features=self.layers[-2], out_features=self.layers[-1]))

        self.mlp = nn.Sequential(*dense_layers).to(device)

    def dense_layer(self, in_features, out_features):
        dense_layer = nn.Sequential(
            nn.Linear(in_features=in_features, out_features=out_features),
        )

        if self.weight_init is not None:
            self.weight_init(dense_layer[0].weight)

        if self.bias_init is not None:
            self.bias_init(dense_layer[0].bias)

        dense_layer.add_module("activation", self.activation)
        return dense_layer


class MLPAgent(MLPBase):
    def __init__(self, layers, activation=nn.Tanh(), weight_init=None, bias_init=None, device='cpu'):
        super().__init__(layers, activation, weight_init, bias_init, device)

    def forward(self, x):
        g_out = self.mlp(x)
        x_out = torch.tanh(g_out[:, 0].clone()).view(-1, 1)
        t_out = torch.sigmoid(g_out[:, 1].clone()).view(-1, 1)
        return torch.cat((x_out, t_out), dim=1)


class MLPPinn(MLPBase):
    def __init__(self, layers, activation=nn.Tanh(), weight_init=None, bias_init=None, device='cpu'):
        super().__init__(layers, activation, weight_init, bias_init, device)

    def forward(self, x):
        return self.mlp(x)

In [28]:
class CNNDense(nn.Module):
    def __init__(self,
                 num_batches,
                 num_filters,
                 kernel_size,
                 mlp_hidden_layers,
                 mlp_hidden_activation,
                 device='cpu',
                 dtype=torch.float32):

        super().__init__()
        self.stride = 2
        self.padding = 0
        self.dilation = 1
        self.dtype = dtype
        self.device = device

        base_layers = [
            nn.Conv1d(in_channels=5,
                      out_channels=num_filters,
                      kernel_size=kernel_size,
                      stride=self.stride),
            nn.BatchNorm1d(num_filters),
            nn.ReLU(),
            nn.Flatten()
        ]

        conv1d_output_size = int(np.floor(1 + (num_batches + 2*self.padding - self.dilation*(kernel_size - 1) - 1) / self.stride))
        mlp_in_features = conv1d_output_size * num_filters

        mlp_layers = [mlp_in_features] + mlp_hidden_layers + [2]

        self.cnn_layer = nn.Sequential(*base_layers).to(device)
        self.mlp = MLPAgent(layers=mlp_layers,
                            activation=mlp_hidden_activation,
                            device=device).to(device)


    def forward(self, x):
        cnn_output = self.cnn_layer(x)
        return self.mlp(cnn_output)

## PINN

In [40]:
class PINNBase():
    def __init__(self,
                 layers,
                 activation,
                 device):

        self.v = 0.01 / math.pi

        # Define the model
        self.model = MLPPinn(layers=layers,
                             activation=activation,
                             weight_init=lambda m: nn.init.xavier_normal_(m.data, nn.init.calculate_gain('tanh')),
                             bias_init=lambda m: nn.init.zeros_(m.data),
                             device=device)

        # Set the optimizers
        adam = torch.optim.Adam(self.model.parameters())
        lbfgs = torch.optim.LBFGS(self.model.parameters(),
                                  lr=1,
                                  max_iter=2000,
                                #   max_iter=5000,
                                  max_eval=None,
                                  tolerance_grad=1e-07,
                                  tolerance_change=1e-09,
                                  history_size=100,
                                  line_search_fn='strong_wolfe')

        self.optimizers = {"adam": adam, "lbfgs": lbfgs}

        # Set the Loss function
        self.criterion = nn.MSELoss()

        # Set the MAE criterion for test data only
        self.l1_loss = nn.L1Loss()


    def forward(self, x):
        return self.model(x)


    def grad(self, output, input):
        return torch.autograd.grad(
                    output, input,
                    grad_outputs=torch.ones_like(output),
                    retain_graph=True,
                    create_graph=True
                )[0]


    def calculate_pde_residual(self, x):
        # Forward pass
        u = self.forward(x)

        # Calculate 1st and 2nd derivatives
        du_dX = self.grad(u, x)
        du_dXX = self.grad(du_dX, x)

        # Retrieve the partial gradients
        du_dt = du_dX[:, 1].flatten()
        du_dx = du_dX[:, 0].flatten()
        du_dxx = du_dXX[:, 0].flatten()

        pde_res = du_dt + u.flatten() * du_dx - self.v * du_dxx

        return u, pde_res


    def calculate_pde_loss(self, data):
        # Calculate the domain loss
        _, self.pde_res = self.calculate_pde_residual(data)
        pde_target = torch.zeros_like(self.pde_res)
        return self.criterion(self.pde_res, pde_target)


    def calculate_total_loss(self, data):
        # Calculate boundary loss
        loss_b = self.criterion(
            self.forward(data["bound_data"]).flatten(),
            data["u_bound"].flatten()
        )
        # print(f"loss_b = {loss_b}")

        # Calculate initial loss
        loss_i = self.criterion(
            self.forward(data["init_data"]).flatten(),
            data["u_init"].flatten()
        )
        # print(f"loss_i = {loss_i}")

        # Calculate the domain loss
        domain_data = torch.cat((data["domain_data"], data["anchors"]), dim=0)
        loss_pde = self.calculate_pde_loss(domain_data)
        # print(f"loss_pde = {loss_pde}")

        # Calculate total discriminator loss
        return loss_b + loss_i + loss_pde


    def evaluate_pinn(self, test_data):
        _, pde_res = self.calculate_pde_residual(test_data)
        pde_target = torch.zeros_like(pde_res)
        return self.l1_loss(pde_res, pde_target)


    def train_step(self, data):
        loss = self.calculate_total_loss(data)
        loss.backward()
        return loss


    def closure(self):
        self.lbfgs_optimizer.zero_grad()
        return self.train_step(self.data)

In [41]:
class PINN():
    def __init__(self,
                 x_min, x_max,
                 t_min, t_max,
                 N_domain,
                 Nx_init,
                 Nt_bound,
                 test_dir,
                 general_max_episode_steps,
                 layers, activation,
                 checkpoint_path,
                 eps=1e-5,
                 device='cpu',
                 dtype=torch.float32):

        # Constants
        self.checkpoint_path = checkpoint_path
        self.device = device
        self.dtype = dtype
        self.x_min = x_min
        self.x_max = x_max
        self.t_min = t_min
        self.t_max = t_max
        self.general_max_episode_steps = general_max_episode_steps
        self.data = {}

        # Create real data
        self.data_init = Data(x_min, x_max,
                              t_min, t_max,
                              test_dir,
                              eps,
                              device,
                              dtype)

        # Create train data
        self.data["domain_data"] = self.data_init.sample_domain(N_domain, self.x_min, self.x_max, self.t_min, self.t_max)
        self.data["bound_data"], self.data["u_bound"] = self.data_init.sample_boundary(Nt_bound)
        self.data["init_data"], self.data["u_init"] = self.data_init.sample_initial(Nx_init, self.x_min, self.x_max)

        # Create test data
        self.test_data = self.data_init.sample_test()

        # Create base PINN
        self.base_pinn = PINNBase(layers, activation, device)


    def add_anchors(self, step, point):
        if step == 0:
            self.data["anchors"] = torch.empty(self.general_max_episode_steps, 2, device=self.device)
        else:
            new_anchors = torch.tensor(point, dtype=self.dtype, device=self.device, requires_grad=True).view(-1, 2)
            self.data["anchors"] = torch.cat((self.data["anchors"], new_anchors), dim=0)


    def train_with_adam(self, N_adam, data):
        optimizer = self.base_pinn.optimizers['adam']

        for epoch in range(1, N_adam + 1):
            optimizer.zero_grad()
            loss = self.base_pinn.train_step(data)
            optimizer.step()

        return loss


    def train_with_lbfgs(self, N_lbfgs, data):
        self.base_pinn.lbfgs_optimizer = self.base_pinn.optimizers["lbfgs"]
        self.base_pinn.data = data

        for epoch in range(1, N_lbfgs + 1):
            loss = self.base_pinn.lbfgs_optimizer.step(self.base_pinn.closure)

        return loss


    def checkpoint(self):
        torch.save({
            "model": self.base_pinn.model.state_dict()
        }, self.checkpoint_path)


    def format_loss(self, loss):
        if loss == 0:
            return "0.0e+00"

        # Calculate the exponent part
        exponent = int(math.log10(abs(loss)))

        # Determine the format based on the value of the loss
        if abs(loss) < 1:
            formatted_loss = f"{loss:.2e}"
        else:
            # Adjust the sign of the formatted loss
            sign = "-" if loss < 0 else ""

            # Calculate the number of decimal places
            decimal_places = 2 - exponent

            # Ensure at least two decimal places
            decimal_places = max(decimal_places, 2)

            # Format the loss with the correct sign
            formatted_loss = f"{sign}{abs(loss):.{decimal_places}e}"

        return formatted_loss


    def keep_checkpoints_and_print_losses(self, iter, patience, print_every, loss, loss_test):

        loss_str = self.format_loss(loss)
        loss_test_str = self.format_loss(loss_test)

        if iter == 1:
            self.best_val_loss = loss_test
            self.best_epoch = -1
            self.checkpoint()
            self.flag = 1
            print(f"Iteration: {iter} | loss: {loss_str} | test_mae: {loss_test_str} - *Checkpoint*")
        else:
            if loss_test < self.best_val_loss:
                self.best_val_loss = loss_test
                self.best_epoch = iter
                self.checkpoint()
                self.flag = 1
                if iter % print_every == 0:
                    print(f"Iteration: {iter} | loss: {loss_str} | test_mae: {loss_test_str} - *Checkpoint*")
            elif iter - self.best_epoch > patience:
                if iter % print_every == 0:
                    self.early_stopping_applied = 1
                    print(f"Iteration: {iter} | loss: {loss_str} | test_mae: {loss_test_str}")
                return

        if (self.flag == 0) and (iter % print_every == 0):
            print(f"Iteration: {iter} | loss: {loss_str} | test_mae: {loss_test_str}")


    def train(self, iters, patience, print_every, N_adam, N_lbfgs):
        print(f"\nPINN: {iters} iterations")
        print(f"a. PINN: {N_adam} epochs --> Adam")
        print(f"b. PINN: {N_lbfgs} epochs --> L-BFGS")

        for iter in tqdm(range(1, iters + 1)):
            self.flag = 0
            self.early_stopping_applied = 0

            # Train with adam
            print(f"\n\nTraining with ADAM...")
            loss = self.train_with_adam(N_adam, self.data)

            # Train with L-BFGS
            print(f"\nTraining with L-BFGS...")
            loss = self.train_with_lbfgs(N_lbfgs, self.data)

            # Evaluate on test
            loss_test = self.base_pinn.evaluate_pinn(self.test_data)

            # Keep check points and print losses
            self.keep_checkpoints_and_print_losses(iter, patience, print_every, loss, loss_test)
            if self.early_stopping_applied:
                print(f"\nEarly stopping applied at epoch {iter}.")
                break

        return loss_test.detach().cpu().numpy()

## Agent

In [31]:
class DeepQNetwork(nn.Module):
  def __init__(self, layers, activation, checkpoint_path):
    super(DeepQNetwork, self).__init__()

    self.checkpoint_path = checkpoint_path
    self.model = MLPAgent(layers=layers,
                          activation=activation,
                          weight_init=lambda m: nn.init.xavier_normal_(m.data),
                          bias_init=lambda m: nn.init.zeros_(m.data),
                          device=device)
    self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
    self.criterion = nn.MSELoss()

  def forward(self, state):
    return self.model(state)

  def save_checkpoint(self):
    torch.save(self.model.state_dict(), self.checkpoint_path)

  def load_checkpoint(self):
    self.model.load_state_dict(torch.load(self.checkpoint_path))

In [52]:
class DDQNAgent():
    def __init__(self, pinn, iterations, patience, print_every,
                 num_epochs_adam, num_epochs_lbfgs, layers,
                 activation, max_anchors, gamma, epsilon,
                 eps_min, eps_dec, replace, checkpoint_path_eval,
                 checkpoint_path_next, device='cpu', dtype=torch.float32):

        self.pinn = pinn
        self.iterations = iterations
        self.patience = patience
        self.print_every = print_every
        self.num_epochs_adam = num_epochs_adam
        self.num_epochs_lbfgs = num_epochs_lbfgs

        self.gamma = gamma
        self.epsilon = epsilon
        self.eps_min = eps_min
        self.eps_dec = eps_dec
        self.replace_target_counter = replace
        self.max_anchors = max_anchors
        self.sampled_points = []
        self.Q_eval = DeepQNetwork(layers, activation, checkpoint_path_eval)
        self.Q_next = DeepQNetwork(layers, activation, checkpoint_path_next)

        self.step_counter = 0
        self.learn_step_counter = 0
        self.device = device
        self.dtype = dtype

    def create_random_action(self):
        x = 2 * torch.rand(1, 1, dtype=self.dtype, device=self.device) - 1
        t = torch.rand(1, 1, dtype=self.dtype, device=self.device)
        return torch.cat((x, t), dim=1)

    def choose_action(self, observation):
        return self.Q_eval.forward(observation) if np.random.random() > self.epsilon else self.create_random_action()

    def create_observation(self, action=None):
        if action is None:
            action = self.create_random_action()

        point = action.requires_grad_(True)
        u_hat, pde_res = self.pinn.base_pinn.calculate_pde_residual(point)
        norm_step_val = self.step_counter / self.max_anchors
        norm_step = norm_step_val * torch.ones(u_hat.shape[0], 1, dtype=self.dtype, device=self.device)
        return torch.cat((action, u_hat, pde_res.view(-1, 1), norm_step), dim=1).detach()

    def reset(self):
        self.step_counter = 0
        self.sampled_points = []
        self.pinn.add_anchors(step=self.step_counter, point=None)
        return self.create_observation(action=None)

    def step(self, action):
        self.step_counter += 1

        # Add sampled points to training data
        self.pinn.add_anchors(self.step_counter, action)
        self.sampled_points.append(action)

        # Generating next input
        next_obs = self.create_observation(action=action)

        # If batch is completed Then train, reward the agent and restart episode (sampling process).
        # Finally store log Else continue episode (sampling process) and no reward is provided
        if self.step_counter < self.max_anchors:
            reward = 0.0
            done = False
        else:
            self.eval_error = self.pinn.train(self.iterations,
                                              self.patience,
                                              self.print_every,
                                              self.num_epochs_adam,
                                              self.num_epochs_lbfgs)
            reward = float(self.eval_error)
            done = True

        # Return transition tuple: next observation, reward, done, truncate=False, info=None
        return next_obs, reward, done, {}


    def replace_target_network(self):
        if self.step_counter % self.replace_target_counter == 0:
            self.Q_next.load_state_dict(self.Q_eval.state_dict())


    def decrement_epsilon(self):
        self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min


    def save_models(self):
        self.Q_eval.save_checkpoint()
        self.Q_next.save_checkpoint()


    def load_models(self):
        self.Q_eval.load_checkpoint()
        self.Q_next.load_checkpoint()


    def train(self, obs, reward, obs_, done):
        self.Q_eval.optimizer.zero_grad()

        self.replace_target_network()

        # indices = np.arange(self.batch_size)

        # ================== CHANGES START HERE ... =====================
        Q_pred = self.Q_eval.forward(obs)   #[indices, actions]
        Q_next = self.Q_next.forward(obs_)
        Q_eval = self.Q_eval.forward(obs_)
        # max_actions = T.argmax(Q_eval, dim=1)

        # where the episodes are reach a terminal state (done=1),
        # there target must be equal to r
        # Q_next[done] = 0.0
        if done:
            Q_target = reward + self.gamma * torch.zeros_like(Q_next)
        else:
            Q_target = reward + self.gamma * Q_next    #[indices, max_actions]
        # ================== CHANGES END HERE ... =======================

        loss = self.Q_eval.criterion(Q_target, Q_pred)  #.to(self.Q_eval.device)
        loss.backward()
        self.Q_eval.optimizer.step()
        self.step_counter += 1

        self.decrement_epsilon()
        self.learn_step_counter += 1

        return loss.item()

# MAIN

## Configuration

In [54]:
# General
seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
dtype = torch.float32
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Data
x_min, x_max = -1, 1
t_min, t_max = 0, 1
N_domain = 2_500        # Number of domain training points
Nt_bound = 100           # Number of training points for x=-1 and x=1
Nx_init = 100            # Number of training points for t=0
test_dir = '/content/drive/MyDrive/test_data.csv' #'test_data.csv'

# PINN-Model
N_layers = 3
N_neurons = 20
layers = [2] + N_layers * [N_neurons] + [1]
hidden_activation = nn.Tanh()

# PINN-Training
iterations = 1
patience = iterations
print_every = 1
num_epochs_adam = 100
num_epochs_lbfgs = 1
chechpoint_path = "pinn_model.pth"

# Agent
trials = 100
max_anchors = 25
epsilon = 0.1
gamma = 0.99
eps_min = 0.01
eps_dec = 5e-7
replace = 50
checkpoint_path_eval="ddq_model_eval.pth",
checkpoint_path_next="ddq_model_next.pth"
N_agent_layers = 3
N_agent_neurons = 64
agent_layers = [5] + N_agent_layers * [N_agent_neurons] + [2]
agent_hidden_activation = nn.ReLU()

## Train

In [55]:
pinn = PINN(
    x_min, x_max, t_min, t_max,
    N_domain, Nx_init, Nt_bound,
    test_dir, max_anchors,
    layers, hidden_activation,
    chechpoint_path,
    device=device
)

agent = DDQNAgent(
    pinn, iterations, patience, print_every,
    num_epochs_adam, num_epochs_lbfgs, agent_layers,
    agent_hidden_activation, max_anchors, gamma, epsilon,
    eps_min, eps_dec, replace, checkpoint_path_eval,
    checkpoint_path_next, device, dtype
)

for trial in range(trials):
    print(f"\n******************** trial = {trial} ********************")
    obs = agent.reset().squeeze().view(-1, 5)
    score = 0
    done = False

    while not done:
        action = agent.choose_action(obs)
        obs_, reward, done, info = agent.step(action)
        score += reward
        agent_loss = agent.train(obs, reward, obs_, int(done))
        obs = obs_

    print(f"Agent loss: {agent_loss}")
    print(f"Score: {score} | epsilon: {agent.epsilon}")


******************** trial = 0 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:40<00:00, 40.76s/it]


Iteration: 1 | loss: 2.10e-01 | test_mae: 3.13e-02 - *Checkpoint*
Agent loss: 0.11416581273078918
Score: 0.03125103935599327 | epsilon: 0.0999935

******************** trial = 1 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:41<00:00, 41.27s/it]


Iteration: 1 | loss: 6.95e-04 | test_mae: 4.42e-02 - *Checkpoint*
Agent loss: 0.10534705966711044
Score: 0.044237904250621796 | epsilon: 0.09998699999999999

******************** trial = 2 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:41<00:00, 41.73s/it]


Iteration: 1 | loss: 5.45e-03 | test_mae: 6.34e-02 - *Checkpoint*
Agent loss: 0.0923684686422348
Score: 0.0633816123008728 | epsilon: 0.09998049999999999

******************** trial = 3 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:42<00:00, 42.75s/it]


Iteration: 1 | loss: 2.56e-04 | test_mae: 4.46e-02 - *Checkpoint*
Agent loss: 0.10321346670389175
Score: 0.04458501935005188 | epsilon: 0.09997399999999998

******************** trial = 4 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:40<00:00, 40.92s/it]


Iteration: 1 | loss: 4.52e-04 | test_mae: 4.12e-02 - *Checkpoint*
Agent loss: 0.09773480892181396
Score: 0.04116290062665939 | epsilon: 0.09996749999999997

******************** trial = 5 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:17<00:00, 17.10s/it]


Iteration: 1 | loss: 3.04e-05 | test_mae: 4.12e-02 - *Checkpoint*
Agent loss: 0.09503310918807983
Score: 0.04124554619193077 | epsilon: 0.09996099999999997

******************** trial = 6 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:20<00:00, 20.09s/it]


Iteration: 1 | loss: 6.16e-04 | test_mae: 3.80e-02 - *Checkpoint*
Agent loss: 0.09075099229812622
Score: 0.03798878192901611 | epsilon: 0.09995449999999996

******************** trial = 7 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.82s/it]


Iteration: 1 | loss: 3.17e-05 | test_mae: 3.89e-02 - *Checkpoint*
Agent loss: 0.08836887031793594
Score: 0.03885738551616669 | epsilon: 0.09994799999999995

******************** trial = 8 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:03<00:00,  3.51s/it]


Iteration: 1 | loss: 1.87e-05 | test_mae: 3.85e-02 - *Checkpoint*
Agent loss: 0.08581762760877609
Score: 0.038465388119220734 | epsilon: 0.09994149999999995

******************** trial = 9 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:38<00:00, 38.82s/it]


Iteration: 1 | loss: 3.75e-03 | test_mae: 3.22e-02 - *Checkpoint*
Agent loss: 0.0877247005701065
Score: 0.03222791105508804 | epsilon: 0.09993499999999994

******************** trial = 10 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:32<00:00, 32.70s/it]


Iteration: 1 | loss: 3.44e-03 | test_mae: 3.15e-02 - *Checkpoint*
Agent loss: 0.085894376039505
Score: 0.03148037567734718 | epsilon: 0.09992849999999993

******************** trial = 11 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:10<00:00, 10.70s/it]


Iteration: 1 | loss: 7.65e-05 | test_mae: 3.17e-02 - *Checkpoint*
Agent loss: 0.08443713188171387
Score: 0.03171413391828537 | epsilon: 0.09992199999999993

******************** trial = 12 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.98s/it]


Iteration: 1 | loss: 2.00e-05 | test_mae: 3.17e-02 - *Checkpoint*
Agent loss: 0.08246376365423203
Score: 0.0316615030169487 | epsilon: 0.09991549999999992

******************** trial = 13 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.51s/it]


Iteration: 1 | loss: 2.30e-05 | test_mae: 3.17e-02 - *Checkpoint*
Agent loss: 0.08037496358156204
Score: 0.03169768303632736 | epsilon: 0.09990899999999991

******************** trial = 14 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.72s/it]


Iteration: 1 | loss: 1.19e-05 | test_mae: 3.14e-02 - *Checkpoint*
Agent loss: 0.0797099843621254
Score: 0.03142845258116722 | epsilon: 0.09990249999999991

******************** trial = 15 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.17it/s]


Training with L-BFGS...
Iteration: 1 | loss: 1.71e-05 | test_mae: 3.13e-02 - *Checkpoint*
Agent loss: 0.077657550573349
Score: 0.03133383020758629 | epsilon: 0.0998959999999999

******************** trial = 16 ********************






PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:06<00:00,  6.98s/it]


Iteration: 1 | loss: 2.12e-05 | test_mae: 3.20e-02 - *Checkpoint*
Agent loss: 0.0744410902261734
Score: 0.03195364028215408 | epsilon: 0.0998894999999999

******************** trial = 17 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.08it/s]


Training with L-BFGS...
Iteration: 1 | loss: 1.47e-04 | test_mae: 3.34e-02 - *Checkpoint*
Agent loss: 0.07603879272937775
Score: 0.03343832492828369 | epsilon: 0.09988299999999989

******************** trial = 18 ********************






PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.19it/s]



Training with L-BFGS...
Iteration: 1 | loss: 2.04e-05 | test_mae: 3.24e-02 - *Checkpoint*
Agent loss: 0.07536391913890839
Score: 0.03244906663894653 | epsilon: 0.09987649999999988

******************** trial = 19 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:13<00:00, 13.47s/it]


Iteration: 1 | loss: 1.62e-04 | test_mae: 3.17e-02 - *Checkpoint*
Agent loss: 0.07266034930944443
Score: 0.031722377985715866 | epsilon: 0.09986999999999988

******************** trial = 20 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.54s/it]


Iteration: 1 | loss: 2.66e-05 | test_mae: 3.21e-02 - *Checkpoint*
Agent loss: 0.07220318168401718
Score: 0.03206069767475128 | epsilon: 0.09986349999999987

******************** trial = 21 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:00<00:00,  1.05it/s]


Iteration: 1 | loss: 6.90e-04 | test_mae: 3.65e-02 - *Checkpoint*
Agent loss: 0.0672624260187149
Score: 0.03653751313686371 | epsilon: 0.09985699999999986

******************** trial = 22 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:09<00:00,  9.16s/it]


Iteration: 1 | loss: 1.30e-04 | test_mae: 3.20e-02 - *Checkpoint*
Agent loss: 0.06606130301952362
Score: 0.03198777511715889 | epsilon: 0.09985049999999986

******************** trial = 23 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.16it/s]


Training with L-BFGS...
Iteration: 1 | loss: 3.96e-04 | test_mae: 3.24e-02 - *Checkpoint*
Agent loss: 0.06378064304590225
Score: 0.03236382082104683 | epsilon: 0.09984399999999985

******************** trial = 24 ********************






PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:11<00:00, 11.40s/it]


Iteration: 1 | loss: 6.87e-05 | test_mae: 3.06e-02 - *Checkpoint*
Agent loss: 0.06681769341230392
Score: 0.030634403228759766 | epsilon: 0.09983749999999984

******************** trial = 25 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.46s/it]


Iteration: 1 | loss: 2.78e-05 | test_mae: 3.12e-02 - *Checkpoint*
Agent loss: 0.061883799731731415
Score: 0.031219076365232468 | epsilon: 0.09983099999999984

******************** trial = 26 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.97s/it]


Iteration: 1 | loss: 2.07e-05 | test_mae: 3.11e-02 - *Checkpoint*
Agent loss: 0.06379510462284088
Score: 0.03111342154443264 | epsilon: 0.09982449999999983

******************** trial = 27 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.02s/it]


Iteration: 1 | loss: 8.12e-03 | test_mae: 3.15e-02 - *Checkpoint*
Agent loss: 0.0653640627861023
Score: 0.03147024288773537 | epsilon: 0.09981799999999982

******************** trial = 28 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.70s/it]


Iteration: 1 | loss: 8.89e-05 | test_mae: 3.11e-02 - *Checkpoint*
Agent loss: 0.06132366135716438
Score: 0.031054867431521416 | epsilon: 0.09981149999999982

******************** trial = 29 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.98s/it]


Iteration: 1 | loss: 1.26e-05 | test_mae: 3.11e-02 - *Checkpoint*
Agent loss: 0.06292292475700378
Score: 0.031063437461853027 | epsilon: 0.09980499999999981

******************** trial = 30 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:21<00:00, 21.61s/it]


Iteration: 1 | loss: 8.80e-04 | test_mae: 3.20e-02 - *Checkpoint*
Agent loss: 0.05915980413556099
Score: 0.03199789300560951 | epsilon: 0.0997984999999998

******************** trial = 31 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:27<00:00, 27.97s/it]


Iteration: 1 | loss: 1.25e-03 | test_mae: 3.07e-02 - *Checkpoint*
Agent loss: 0.05536176636815071
Score: 0.030698398128151894 | epsilon: 0.0997919999999998

******************** trial = 32 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:10<00:00, 10.25s/it]


Iteration: 1 | loss: 2.15e-05 | test_mae: 3.07e-02 - *Checkpoint*
Agent loss: 0.10108204185962677
Score: 0.030684657394886017 | epsilon: 0.09978549999999979

******************** trial = 33 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:03<00:00,  3.65s/it]


Iteration: 1 | loss: 1.21e-05 | test_mae: 3.09e-02 - *Checkpoint*
Agent loss: 0.06724505126476288
Score: 0.03089505061507225 | epsilon: 0.09977899999999978

******************** trial = 34 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.26s/it]


Iteration: 1 | loss: 1.20e-05 | test_mae: 3.08e-02 - *Checkpoint*
Agent loss: 0.05923924222588539
Score: 0.0307777039706707 | epsilon: 0.09977249999999978

******************** trial = 35 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:09<00:00,  9.93s/it]


Iteration: 1 | loss: 1.82e-05 | test_mae: 3.13e-02 - *Checkpoint*
Agent loss: 0.05770094320178032
Score: 0.03130076080560684 | epsilon: 0.09976599999999977

******************** trial = 36 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.10s/it]


Iteration: 1 | loss: 1.04e-05 | test_mae: 3.11e-02 - *Checkpoint*
Agent loss: 0.05610201507806778
Score: 0.03110714815557003 | epsilon: 0.09975949999999977

******************** trial = 37 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.37it/s]



Training with L-BFGS...
Iteration: 1 | loss: 1.33e-05 | test_mae: 3.10e-02 - *Checkpoint*
Agent loss: 0.05842294543981552
Score: 0.031013622879981995 | epsilon: 0.09975299999999976

******************** trial = 38 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.38s/it]


Iteration: 1 | loss: 1.51e-05 | test_mae: 3.10e-02 - *Checkpoint*
Agent loss: 0.05259747430682182
Score: 0.03102259151637554 | epsilon: 0.09974649999999975

******************** trial = 39 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:08<00:00,  8.87s/it]


Iteration: 1 | loss: 7.21e-05 | test_mae: 3.02e-02 - *Checkpoint*
Agent loss: 0.0538533590734005
Score: 0.03022010624408722 | epsilon: 0.09973999999999975

******************** trial = 40 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:13<00:00, 13.94s/it]


Iteration: 1 | loss: 2.55e-05 | test_mae: 2.96e-02 - *Checkpoint*
Agent loss: 0.05273367837071419
Score: 0.02955666370689869 | epsilon: 0.09973349999999974

******************** trial = 41 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:06<00:00,  6.70s/it]


Iteration: 1 | loss: 2.05e-05 | test_mae: 2.94e-02 - *Checkpoint*
Agent loss: 0.0449238121509552
Score: 0.029443059116601944 | epsilon: 0.09972699999999973

******************** trial = 42 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:23<00:00, 23.66s/it]


Iteration: 1 | loss: 9.55e-05 | test_mae: 2.89e-02 - *Checkpoint*
Agent loss: 0.07914982736110687
Score: 0.028900504112243652 | epsilon: 0.09972049999999973

******************** trial = 43 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:06<00:00,  6.44s/it]


Iteration: 1 | loss: 2.93e-05 | test_mae: 2.93e-02 - *Checkpoint*
Agent loss: 0.04419972747564316
Score: 0.02925511635839939 | epsilon: 0.09971399999999972

******************** trial = 44 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.91s/it]


Iteration: 1 | loss: 1.04e-05 | test_mae: 2.87e-02 - *Checkpoint*
Agent loss: 0.07426059246063232
Score: 0.028701290488243103 | epsilon: 0.09970749999999971

******************** trial = 45 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.29s/it]


Iteration: 1 | loss: 7.98e-06 | test_mae: 2.87e-02 - *Checkpoint*
Agent loss: 0.0646710991859436
Score: 0.028727106750011444 | epsilon: 0.0997009999999997

******************** trial = 46 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:19<00:00, 19.58s/it]


Iteration: 1 | loss: 9.57e-04 | test_mae: 2.68e-02 - *Checkpoint*
Agent loss: 0.048019662499427795
Score: 0.026840370148420334 | epsilon: 0.0996944999999997

******************** trial = 47 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:41<00:00, 41.31s/it]


Iteration: 1 | loss: 8.49e-04 | test_mae: 4.07e-02 - *Checkpoint*
Agent loss: 0.07307882606983185
Score: 0.04069210961461067 | epsilon: 0.0996879999999997

******************** trial = 48 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:40<00:00, 40.97s/it]


Iteration: 1 | loss: 1.32e-02 | test_mae: 3.56e-02 - *Checkpoint*
Agent loss: 0.06228635460138321
Score: 0.03562742471694946 | epsilon: 0.09968149999999969

******************** trial = 49 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:33<00:00, 33.64s/it]


Iteration: 1 | loss: 2.92e-03 | test_mae: 3.67e-02 - *Checkpoint*
Agent loss: 0.04869228973984718
Score: 0.03667665272951126 | epsilon: 0.09967499999999968

******************** trial = 50 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.72s/it]


Iteration: 1 | loss: 7.33e-05 | test_mae: 3.61e-02 - *Checkpoint*
Agent loss: 0.04245329275727272
Score: 0.03606879711151123 | epsilon: 0.09966849999999967

******************** trial = 51 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:42<00:00, 42.56s/it]


Iteration: 1 | loss: 5.61e-03 | test_mae: 3.76e-02 - *Checkpoint*
Agent loss: 0.04785876348614693
Score: 0.03760303929448128 | epsilon: 0.09966199999999967

******************** trial = 52 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:40<00:00, 40.25s/it]


Iteration: 1 | loss: 3.69e-03 | test_mae: 3.66e-02 - *Checkpoint*
Agent loss: 0.03555566817522049
Score: 0.03657831251621246 | epsilon: 0.09965549999999966

******************** trial = 53 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:18<00:00, 18.80s/it]


Iteration: 1 | loss: 4.38e-05 | test_mae: 3.49e-02 - *Checkpoint*
Agent loss: 0.048063039779663086
Score: 0.03492620587348938 | epsilon: 0.09964899999999965

******************** trial = 54 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:14<00:00, 14.43s/it]


Iteration: 1 | loss: 3.66e-05 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.04884807765483856
Score: 0.034529123455286026 | epsilon: 0.09964249999999965

******************** trial = 55 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.70s/it]


Iteration: 1 | loss: 1.31e-05 | test_mae: 3.47e-02 - *Checkpoint*
Agent loss: 0.047798313200473785
Score: 0.03474412113428116 | epsilon: 0.09963599999999964

******************** trial = 56 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:26<00:00, 26.27s/it]


Iteration: 1 | loss: 1.16e-03 | test_mae: 3.60e-02 - *Checkpoint*
Agent loss: 0.26950603723526
Score: 0.036011483520269394 | epsilon: 0.09962949999999964

******************** trial = 57 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:18<00:00, 18.24s/it]


Iteration: 1 | loss: 8.95e-05 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.07617199420928955
Score: 0.034488338977098465 | epsilon: 0.09962299999999963

******************** trial = 58 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:14<00:00, 14.61s/it]


Iteration: 1 | loss: 9.16e-04 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.06206526979804039
Score: 0.03454769775271416 | epsilon: 0.09961649999999962

******************** trial = 59 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:11<00:00, 11.01s/it]


Iteration: 1 | loss: 2.98e-04 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.044404659420251846
Score: 0.034541841596364975 | epsilon: 0.09960999999999962

******************** trial = 60 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:15<00:00, 15.49s/it]


Iteration: 1 | loss: 2.22e-03 | test_mae: 3.70e-02 - *Checkpoint*
Agent loss: 0.05251866951584816
Score: 0.03700410947203636 | epsilon: 0.09960349999999961

******************** trial = 61 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.97s/it]


Iteration: 1 | loss: 3.56e-05 | test_mae: 3.59e-02 - *Checkpoint*
Agent loss: 0.029134949669241905
Score: 0.035935427993535995 | epsilon: 0.0995969999999996

******************** trial = 62 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:03<00:00,  3.62s/it]


Iteration: 1 | loss: 4.50e-05 | test_mae: 3.63e-02 - *Checkpoint*
Agent loss: 0.04552380368113518
Score: 0.036331579089164734 | epsilon: 0.0995904999999996

******************** trial = 63 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:09<00:00,  9.76s/it]


Iteration: 1 | loss: 1.52e-04 | test_mae: 3.46e-02 - *Checkpoint*
Agent loss: 0.05519890785217285
Score: 0.03457047790288925 | epsilon: 0.09958399999999959

******************** trial = 64 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:08<00:00,  8.52s/it]


Iteration: 1 | loss: 2.22e-05 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.048460040241479874
Score: 0.03445565328001976 | epsilon: 0.09957749999999958

******************** trial = 65 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.19s/it]


Iteration: 1 | loss: 2.14e-05 | test_mae: 3.44e-02 - *Checkpoint*
Agent loss: 0.024640902876853943
Score: 0.034380942583084106 | epsilon: 0.09957099999999958

******************** trial = 66 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.75s/it]


Iteration: 1 | loss: 1.09e-05 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.055746808648109436
Score: 0.03453436866402626 | epsilon: 0.09956449999999957

******************** trial = 67 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:18<00:00, 18.68s/it]


Iteration: 1 | loss: 3.11e-04 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.020758006721735
Score: 0.03503144532442093 | epsilon: 0.09955799999999956

******************** trial = 68 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.32it/s]



Training with L-BFGS...
Iteration: 1 | loss: 7.60e-06 | test_mae: 3.51e-02 - *Checkpoint*
Agent loss: 0.05109862610697746
Score: 0.035062987357378006 | epsilon: 0.09955149999999956

******************** trial = 69 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.27it/s]



Training with L-BFGS...
Iteration: 1 | loss: 1.35e-05 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.01846490614116192
Score: 0.0350119024515152 | epsilon: 0.09954499999999955

******************** trial = 70 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.12s/it]


Iteration: 1 | loss: 9.94e-06 | test_mae: 3.46e-02 - *Checkpoint*
Agent loss: 0.054503995925188065
Score: 0.03464038297533989 | epsilon: 0.09953849999999954

******************** trial = 71 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:16<00:00, 16.22s/it]


Iteration: 1 | loss: 9.50e-04 | test_mae: 3.77e-02 - *Checkpoint*
Agent loss: 0.040133997797966
Score: 0.03772032633423805 | epsilon: 0.09953199999999954

******************** trial = 72 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:10<00:00, 10.40s/it]


Iteration: 1 | loss: 2.04e-05 | test_mae: 3.54e-02 - *Checkpoint*
Agent loss: 0.0723683312535286
Score: 0.03539818152785301 | epsilon: 0.09952549999999953

******************** trial = 73 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.29it/s]



Training with L-BFGS...
Iteration: 1 | loss: 1.14e-05 | test_mae: 3.55e-02 - *Checkpoint*
Agent loss: 0.02453628182411194
Score: 0.03550494462251663 | epsilon: 0.09951899999999952

******************** trial = 74 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.27it/s]



Training with L-BFGS...
Iteration: 1 | loss: 4.82e-04 | test_mae: 3.89e-02 - *Checkpoint*
Agent loss: 0.031521812081336975
Score: 0.03887586668133736 | epsilon: 0.09951249999999952

******************** trial = 75 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:12<00:00, 12.28s/it]


Iteration: 1 | loss: 1.04e-04 | test_mae: 3.56e-02 - *Checkpoint*
Agent loss: 0.019605904817581177
Score: 0.03559020534157753 | epsilon: 0.09950599999999951

******************** trial = 76 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:08<00:00,  8.43s/it]


Iteration: 1 | loss: 1.68e-04 | test_mae: 3.59e-02 - *Checkpoint*
Agent loss: 0.050028178840875626
Score: 0.03586740791797638 | epsilon: 0.0994994999999995

******************** trial = 77 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:08<00:00,  8.04s/it]


Iteration: 1 | loss: 4.29e-05 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.023973852396011353
Score: 0.035048842430114746 | epsilon: 0.0994929999999995

******************** trial = 78 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.30it/s]



Training with L-BFGS...
Iteration: 1 | loss: 1.04e-05 | test_mae: 3.51e-02 - *Checkpoint*
Agent loss: 0.0434454083442688
Score: 0.035094667226076126 | epsilon: 0.09948649999999949

******************** trial = 79 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:03<00:00,  3.50s/it]


Iteration: 1 | loss: 8.93e-06 | test_mae: 3.51e-02 - *Checkpoint*
Agent loss: 0.015123981051146984
Score: 0.035085711628198624 | epsilon: 0.09947999999999949

******************** trial = 80 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:01<00:00,  1.31s/it]


Iteration: 1 | loss: 6.63e-06 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.0760517418384552
Score: 0.035015590488910675 | epsilon: 0.09947349999999948

******************** trial = 81 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.06s/it]


Iteration: 1 | loss: 8.21e-06 | test_mae: 3.52e-02 - *Checkpoint*
Agent loss: 0.018774082884192467
Score: 0.03516482934355736 | epsilon: 0.09946699999999947

******************** trial = 82 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.99s/it]


Iteration: 1 | loss: 8.73e-06 | test_mae: 3.52e-02 - *Checkpoint*
Agent loss: 0.06327448040246964
Score: 0.035202763974666595 | epsilon: 0.09946049999999947

******************** trial = 83 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.17s/it]


Iteration: 1 | loss: 2.36e-05 | test_mae: 3.52e-02 - *Checkpoint*
Agent loss: 0.011684712953865528
Score: 0.03518917039036751 | epsilon: 0.09945399999999946

******************** trial = 84 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.34it/s]



Training with L-BFGS...
Iteration: 1 | loss: 1.13e-05 | test_mae: 3.52e-02 - *Checkpoint*
Agent loss: 0.05763080343604088
Score: 0.035160843282938004 | epsilon: 0.09944749999999945

******************** trial = 85 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.33it/s]



Training with L-BFGS...
Iteration: 1 | loss: 1.18e-05 | test_mae: 3.52e-02 - *Checkpoint*
Agent loss: 0.01153411716222763
Score: 0.035176340490579605 | epsilon: 0.09944099999999945

******************** trial = 86 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.46s/it]


Iteration: 1 | loss: 8.80e-06 | test_mae: 3.51e-02 - *Checkpoint*
Agent loss: 0.04689836874604225
Score: 0.03511472046375275 | epsilon: 0.09943449999999944

******************** trial = 87 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:04<00:00,  4.52s/it]


Iteration: 1 | loss: 4.25e-05 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.013393701985478401
Score: 0.03496132418513298 | epsilon: 0.09942799999999943

******************** trial = 88 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.00s/it]


Iteration: 1 | loss: 6.00e-06 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.04643688350915909
Score: 0.034968119114637375 | epsilon: 0.09942149999999943

******************** trial = 89 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:12<00:00, 12.67s/it]


Iteration: 1 | loss: 1.63e-04 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.009861216880381107
Score: 0.03497405722737312 | epsilon: 0.09941499999999942

******************** trial = 90 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:06<00:00,  6.64s/it]


Iteration: 1 | loss: 1.85e-05 | test_mae: 3.46e-02 - *Checkpoint*
Agent loss: 0.05187104642391205
Score: 0.03461936116218567 | epsilon: 0.09940849999999941

******************** trial = 91 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:04<00:00,  4.91s/it]


Iteration: 1 | loss: 3.99e-05 | test_mae: 3.43e-02 - *Checkpoint*
Agent loss: 0.008983273059129715
Score: 0.03426305577158928 | epsilon: 0.09940199999999941

******************** trial = 92 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...


100%|██████████| 1/1 [00:00<00:00,  1.35it/s]



Training with L-BFGS...
Iteration: 1 | loss: 6.19e-06 | test_mae: 3.43e-02 - *Checkpoint*
Agent loss: 0.04820624366402626
Score: 0.0342729426920414 | epsilon: 0.0993954999999994

******************** trial = 93 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:03<00:00,  3.80s/it]


Iteration: 1 | loss: 3.51e-05 | test_mae: 3.47e-02 - *Checkpoint*
Agent loss: 0.0073042986914515495
Score: 0.03466876968741417 | epsilon: 0.0993889999999994

******************** trial = 94 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:06<00:00,  6.40s/it]


Iteration: 1 | loss: 1.16e-05 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.05154349282383919
Score: 0.03501049801707268 | epsilon: 0.09938249999999939

******************** trial = 95 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:04<00:00,  4.06s/it]


Iteration: 1 | loss: 9.40e-06 | test_mae: 3.44e-02 - *Checkpoint*
Agent loss: 0.006887808442115784
Score: 0.034428179264068604 | epsilon: 0.09937599999999938

******************** trial = 96 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:03<00:00,  3.11s/it]


Iteration: 1 | loss: 1.15e-05 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.05046183988451958
Score: 0.035024166107177734 | epsilon: 0.09936949999999937

******************** trial = 97 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:07<00:00,  7.09s/it]


Iteration: 1 | loss: 1.71e-05 | test_mae: 3.50e-02 - *Checkpoint*
Agent loss: 0.007220123894512653
Score: 0.03497465327382088 | epsilon: 0.09936299999999937

******************** trial = 98 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:02<00:00,  2.74s/it]


Iteration: 1 | loss: 9.31e-06 | test_mae: 3.52e-02 - *Checkpoint*
Agent loss: 0.051944855600595474
Score: 0.03524667024612427 | epsilon: 0.09935649999999936

******************** trial = 99 ********************

PINN: 1 iterations
a. PINN: 100 epochs --> Adam
b. PINN: 1 epochs --> L-BFGS


  0%|          | 0/1 [00:00<?, ?it/s]



Training with ADAM...

Training with L-BFGS...


100%|██████████| 1/1 [00:05<00:00,  5.07s/it]

Iteration: 1 | loss: 3.17e-05 | test_mae: 3.45e-02 - *Checkpoint*
Agent loss: 0.007982981391251087
Score: 0.03450756147503853 | epsilon: 0.09934999999999936



