# Imports

In [6]:
import importlib
from functools import partial

from torch import nn
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
import copy

from decision_learning.data.shortest_path_grid import genData
from decision_learning.modeling.loss import SPOPlus, get_loss_function
from decision_learning.modeling.models import LinearRegression
from decision_learning.modeling.val_metrics import decision_regret
from decision_learning.modeling.train import train, calc_test_regret, init_loss_data_pretraining, filter_kwargs

# Training Loop Overview
While user can run entire experiment pipeline using `decision_learning.modeling.pipeline`, sometimes they may want more detailed control over training data setup (compared to the default behavior in `decision_learning.modeling.pipeline.lossfn_experiment_data_pipeline`, or hyperparmeter searching (compared to the default grid search in the pipeline function `decision_learning.modeling.pipeline.lossfn_experiment_pipeline`). This may happen to a highly customized loss function or experiment process. In that case, we may still want a function to handle Pytorch training loop behavior and we can use the appropriate customized components without needing to delve into boilerplate Pytorch code. In that case, we can use the `decision_learning.modeling.train.train` function to handle pytorch training functionality

At a high level, the train function needs the following components:
- prediction model: predicting true costs
- optimization model: linear optimization model parameterized by cost/coefficient vector for objective function, and returns the corresponding solution, objective value.
- loss function: a callable nn.Module loss function that can be used for PyTorch autograd functionality
- structured training data inputs: dictionary mapping from keys to specific data (features, true costs, true obj, true sol), for training, validation, and test sets
- val metric: a callable function used during each epoch to evaluate model on the validation set
- misc training loop parameters: device, number epochs, optimizer, learning rate, etc.

## Optimization Model (linear)

Decision-aware/focused problems require an optimization model to actually solve the decision problem. Since each decision problem is unique in terms of the modeling and solving, the user is expected to provide the optimization model function/object, which is treated like a black-box by the `pipeline`,`train`, and loss/regret functions in the code base. It could be Gurobi, Pyomo, or any user custom solver. However, to play nicely with the rest of the package, it must do the following:

- Input Argument when called:
    - costs: vector of objective function coefficients. Expected to be numpy np.ndarray or torch.tensor
- Returns 2 objects:
    - sols: solution to optimization model given the input costs. Expected to be numpy np.ndarray or torch.tensor
    - obj: objective value to optimization model given the input costs. Expected to be numpy np.ndarray or torch.tensor
 
The return objects of optimal solution and objective are generally returned as any solver, and any linear program needs its objective function to be parameterized by a vector of cost/coefficients.


### Example Solver/Optimization Model
- Below, `shortest_path_solver` is a custom user optimization model specified in the form of a callable function, and its first input argument is the vector of costs. The rest of the input arguments size, sens, need to be pre-set before being passed to `pipeline`, `train`, or any loss function. This can be accomplished using the `partial` python function (see example below). The exact implementation is not important but mainly that it:
    - accepts a costs vector input
    - returns solution (sol) and objective value (obj) for the input cost vector
- Note that `shortest_path_solver` also has two returns: sol, obj

In [7]:
def shortest_path_solver(costs, size, sens = 1e-4):
    # Forward Pass
    starting_ind = 0
    starting_ind_c = 0
    samples = costs.shape[0]
    V_arr = torch.zeros(samples, size ** 2)
    for i in range(0, 2 * (size - 1)):
        num_nodes = min(i + 1, 9 - i)
        num_nodes_next = min(i + 2, 9 - i - 1)
        num_arcs = 2 * (max(num_nodes, num_nodes_next) - 1)
        V_1 = V_arr[:, starting_ind:starting_ind + num_nodes]
        layer_costs = costs[:, starting_ind_c:starting_ind_c + num_arcs]
        l_costs = layer_costs[:, 0::2]
        r_costs = layer_costs[:, 1::2]
        next_V_val_l = torch.ones(samples, num_nodes_next) * float('inf')
        next_V_val_r = torch.ones(samples, num_nodes_next) * float('inf')
        if num_nodes_next > num_nodes:
            next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
            next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
        else:
            next_V_val_l = V_1[:, :num_nodes_next] + l_costs
            next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
        next_V_val = torch.minimum(next_V_val_l, next_V_val_r)
        V_arr[:, starting_ind + num_nodes:starting_ind + num_nodes + num_nodes_next] = next_V_val

        starting_ind += num_nodes
        starting_ind_c += num_arcs

    # Backward Pass
    starting_ind = size ** 2
    starting_ind_c = costs.shape[1]
    prev_act = torch.ones(samples, 1)
    sol = torch.zeros(costs.shape)
    for i in range(2 * (size - 1), 0, -1):
        num_nodes = min(i + 1, 9 - i)
        num_nodes_next = min(i, 9 - i + 1)
        V_1 = V_arr[:, starting_ind - num_nodes:starting_ind]
        V_2 = V_arr[:, starting_ind - num_nodes - num_nodes_next:starting_ind - num_nodes]

        num_arcs = 2 * (max(num_nodes, num_nodes_next) - 1)
        layer_costs = costs[:, starting_ind_c - num_arcs: starting_ind_c]

        if num_nodes < num_nodes_next:
            l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
            r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
            prev_act = torch.zeros(V_2.shape)
            prev_act[:, :num_nodes_next - 1] += l_cs_res
            prev_act[:, 1:num_nodes_next] += r_cs_res
        else:
            l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
            r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
            prev_act = torch.zeros(V_2.shape)
            prev_act += l_cs_res
            prev_act += r_cs_res
        cs = torch.zeros(layer_costs.shape)
        cs[:, ::2] = l_cs_res
        cs[:, 1::2] = r_cs_res
        sol[:, starting_ind_c - num_arcs: starting_ind_c] = cs

        starting_ind = starting_ind - num_nodes
        starting_ind_c = starting_ind_c - num_arcs
    # Dimension (samples, num edges)
    obj = torch.sum(sol * costs, axis=1)
    # Dimension (samples, 1)
    return sol.to(torch.float32), obj.reshape(-1,1).to(torch.float32)

### Presetting non-cost input arguments of `shortest_path_solver`

In [8]:
# ------------optimization model------------
optmodel = partial(shortest_path_solver,size=5)

## Data Generation Setup
Any decision-aware/focused problem will of course need data inputs. The example below uses a pre-implemented synthetic data generator provided in the package found within 
`decision_learning.data.shortest_path_grid` to generate shortest path problem and can be generated by calling the `genData` function

### Specific parameters to set up data generation
This data setup, and the synthetic data generation is in line with the paper https://arxiv.org/pdf/2402.03256 

### Create Experiments Grid
This shortest path experiment has two important settings:
- number of samples: less samples means higher error/more noise, more samples means lower error/less noise
- epsilon: noise level on edge costs, can be uniformly distributed multiplicative noise, or normally distributed additive noise
- This example below creates 100 trials for 8 different settings

In [9]:
# control the randomization seeding for pytorch
torch.manual_seed(105)
indices_arr = torch.randperm(100000)
indices_arr_test = torch.randperm(100000)

n_arr = [200, 400, 800, 1600] # array of number of samples for an experiment
ep_arr = ['unif', 'normal'] # noise type
trials = 100 # number of trials per setting

# create an array where each item is [number of samples, noise type, trial number] representing an experiment run
exp_arr = []
for n in n_arr:
    for ep in ep_arr:
        for t in range(trials):
            exp_arr.append([n, ep, t]) # add current [number of samples, noise type, trial number] experiment run setting

In [10]:
# setup
sim = 0 # simulation trial number, only show one experiment run for demonstration purposes
exp = exp_arr[sim] # current experiment
num_data = exp[0]  # number of training data
ep_type = exp[1] # noise type of current experiment
trial = exp[2] # trial number of current experiment

# shortest path problem data generation parameters - https://arxiv.org/pdf/2402.03256
grid = (5, 5)  # grid size
num_feat = 5  # size of feature
deg = 6  # polynomial degree in edge cost function
e = .3  # noise width/amount of noise

# path planting for shortest path example - see page 9, subsection "Harder Example with Planted Arcs" in section 4.2 of paper https://arxiv.org/pdf/2402.03256
planted_good_pwl_params = {'slope0':0, # slope of first segment of piecewise linear cost function for "good" edge cost planted
                    'int0':2, # intercept of first segment of piecewise linear cost function for "good" edge cost planted
                    'slope1':0, # slope of second segment of piecewise linear cost function for "good" edge cost planted
                    'int1':2} # intercept of second segment of piecewise linear cost function for "good" edge cost planted
planted_bad_pwl_params = {'slope0':4, # slope of first segment of piecewise linear cost function for "bad" edge cost planted
                    'int0':0, # intercept of first segment of piecewise linear cost function for "bad" edge cost planted
                    'slope1':0, # slope of second segment of piecewise linear cost function for "bad" edge cost planted
                    'int1':2.2} # intercept of second segment of piecewise linear cost function for "bad" edge cost planted
plant_edge = True # to plant edges in shortest path experiment or not

print(f'current experiment setting: number of data points {num_data}, epsilon type {ep_type}, trial number {trial}')

current experiment setting: number of data points 200, epsilon type unif, trial number 0


### Calling `genData` from `decision_learning.data.shortest_path_grid`

In [11]:
# ------------DATA------------
# training data
generated_data = genData(num_data=num_data+200, # number of data points to generate for training set
        num_features=num_feat, # number of features 
        grid=grid, # grid shape
        deg=deg, # polynomial degree
        noise_type=ep_type, # epsilon noise type
        noise_width=e, # amount of noise
        seed=indices_arr[trial], # seed the randomness
        plant_edges=plant_edge, # to plant edges or not
        planted_good_pwl_params=planted_good_pwl_params, # cost function for good edges
        planted_bad_pwl_params=planted_bad_pwl_params) # cost function for bad edges

# testing data
generated_data_test = genData(num_data=10000, # number of data points to generate for test set
        num_features=num_feat, # number of features 
        grid=grid,  # grid shape
        deg=deg,  # polynomial degree
        noise_type=ep_type,  # epsilon noise type
        noise_width=e, # amount of noise
        seed=indices_arr_test[trial],      # seed the randomness
        plant_edges=plant_edge, # to plant edges or not
        planted_good_pwl_params=planted_good_pwl_params, # cost function for good edges
        planted_bad_pwl_params=planted_bad_pwl_params) # cost function for bad edges

### Split Data into train, val and create structured data inputs
For many decision aware loss function experiment processes, we need the following four:
- X: features
- true_cost: true cost associated with each X
- true_sol: true solution of LP given true_cost
- true_obj: true objective of LP given true_cost

In this case, we will need to get the `true_sol` and `true_obj` for each data sample/`true_cost` vector by plugging into our optimization solver. The code block below shows this.

Furthermore, this package's training loop, in order to flexibly handle different loss function signatures and behavior, requires data to be organized as dictionaries mapping key->data where the keys correspond to named arguments in the loss function. This way, the training loop can flexibly inject or remove the correct named arguments to each loss function

In [12]:
# training data - get true_sol, true_obj
sol, obj = shortest_path_solver(costs=generated_data['cost'], size=5) # plug into solver
# get structured data in the form of dictionary
final_data = {'X':generated_data['feat'],
              'true_cost':generated_data['cost'],
              'true_sol':sol,
              'true_obj':obj}

# ------------------TRAIN/VAL SPLIT--------------------
train_dict = {}
val_dict = {}

# For each (key,value) tuple in final_data, we split into train val split
# using sklearn train_test_split. Because the test_size, random_state seed are
# always the same, ensures each (key,value) are split the same way across indices
# (this behavior has been checked/tested)
for key, value in final_data.items():
    train_data, val_data = train_test_split(value, test_size=200, random_state=42)
    train_dict[key] = train_data
    val_dict[key] = val_data
    
# test data - get true_sol, true_obj and structured data form
sol_test, obj_test = shortest_path_solver(costs=generated_data_test['cost_true'], size=5)
final_data_test = {'X':generated_data_test['feat'],
              'true_cost':generated_data_test['cost_true'],
              'true_sol':sol_test,
              'true_obj':obj_test}

  next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
  next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
  next_V_val_l = V_1[:, :num_nodes_next] + l_costs
  next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
  l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
  r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
  l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
  r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
  obj = torch.sum(sol * costs, axis=1)


## Prediction Model
- Any decision-aware/focused problem will of course need prediction model to predict the cost/coefficient vector given contextual input/features. This example uses a simple `LinearRegression` object implemented within `decision_learning.modeling.models`. 
- The package expects the prediction model to be a PyTorch model since PyTorch offers convenient autograd functionality/allows user to specify custom losses/backwards passes that are found within many decision-aware/focused works.

In [8]:
# ------------prediction model------------
pred_model = LinearRegression(input_dim=generated_data['feat'].shape[1],
                 output_dim=generated_data['cost'].shape[1])

### Loss Function
Below, we use the preimplemented `SPOPlus` loss from `decision_learning.modeling.loss`, which requires an optimization model input for solving for sol,obj under current predicted costs for loss, backpropogation each epoch

In [9]:
# loss function
loss_fn = SPOPlus(optmodel=optmodel)

# Training Loop

Initialize Inputs to Trainining Loop: the below code block was previously already instantiated, here it is copy and pasting the separate components for ease of reading

In [10]:
# Prediction Model
pred_model = LinearRegression(input_dim=train_dict['X'].shape[1],
                 output_dim=train_dict['true_cost'].shape[1])

# optimization solver
optmodel = partial(shortest_path_solver,size=5)

# training, validation data
train_data_dict = train_dict
val_data_dict = val_dict

In [31]:
metrics, trained_model = train(pred_model=pred_model, # prediction model
                optmodel=optmodel, # optimization model
                loss_fn=loss_fn, # loss function
                train_data_dict=train_data_dict, # training data dictionary
                val_data_dict=val_data_dict, # validation data dictionary
                test_data_dict=final_data_test, # test data dictionary
                dataloader_params={'batch_size':200, 'shuffle':True}, # pytorch dataloader configuration
                num_epochs=100, # number of epochs to train for
                lr=0.01, # learning rate
                scheduler_params=None, # learning rate scheduler - example: {'step_size': 10, 'gamma': 0.1}, None means no scheduler
                minimization=True, # minimization problem
                verbose=True) # output training loop details - better to train off sometimes if no console output wanted for many experiments

Training Loader: Epoch 1/100: 100%|██████████| 1/1 [00:00<00:00, 139.97it/s]
Validation Loader: Epoch 1/100: 100%|██████████| 1/1 [00:00<00:00, 342.00it/s]
2024-12-03 09:06:29,849 - decision_learning.modeling.train - INFO - epoch: 1, train_loss: 3.611985206604004, val_metric: 0.19141055642391308, test_regret: 0.03251729040452759
Training Loader: Epoch 2/100: 100%|██████████| 1/1 [00:00<00:00, 152.10it/s]
Validation Loader: Epoch 2/100: 100%|██████████| 1/1 [00:00<00:00, 351.66it/s]
2024-12-03 09:06:29,884 - decision_learning.modeling.train - INFO - epoch: 2, train_loss: 3.6669788360595703, val_metric: 0.2009515321695429, test_regret: 0.03291050814130325
Training Loader: Epoch 3/100: 100%|██████████| 1/1 [00:00<00:00, 152.41it/s]
Validation Loader: Epoch 3/100: 100%|██████████| 1/1 [00:00<00:00, 355.60it/s]
2024-12-03 09:06:29,919 - decision_learning.modeling.train - INFO - epoch: 3, train_loss: 3.640791654586792, val_metric: 0.2049022209053301, test_regret: 0.03295568888434176
Training

In [18]:
metrics

Unnamed: 0,epoch,train_loss,val_metric,test_regret
0,0,13.636871,0.352304,0.321175
1,1,13.056755,0.351924,0.305195
2,2,12.505816,0.347754,0.288633
3,3,11.984661,0.324476,0.272795
4,4,11.486254,0.332619,0.256549
...,...,...,...,...
95,95,4.242315,0.220304,0.046371
96,96,4.210351,0.213829,0.046720
97,97,4.248733,0.211432,0.046008
98,98,4.131743,0.227005,0.046019


# Evaluation Regret
After training model, we may want to evaluate the model on a separate performance metric like normalized regret on a test dataset. While this is automatically done as part of the `train` function, here we explicitly show it for clarity

In [32]:
test_regret = calc_test_regret(pred_model=trained_model,
                               test_data_dict=final_data_test, #final_data_te,
                               optmodel=optmodel)
print(test_regret)

0.03301574175260439


# Further Examples
Since we may want to try different loss functions for decision aware problems, below are two more examples of preimplemented loss functions. The key is to ensure the training data dictionary named keys match the named arguments of the specific loss function. (Note we don't need to do this for val, test data dictionaries since those are not passed to loss function, but rather the validation metric)

## MSE

In [38]:
# Prediction Model
pred_model = LinearRegression(input_dim=train_dict['X'].shape[1],
                 output_dim=train_dict['true_cost'].shape[1])

# optimization solver
optmodel = partial(shortest_path_solver,size=5)

# loss function
loss_fn = nn.MSELoss()

# training, validation data - need to modify training data dict to match inputs to loss function
train_data_dict = train_dict
train_data_dict.update({'target':train_dict['true_cost']}) # extra input key needed for loss function

val_data_dict = val_dict

In [39]:
metrics, trained_model = train(pred_model=pred_model,
                optmodel=optmodel,
                loss_fn=loss_fn,
                train_data_dict=train_data_dict,
                val_data_dict=val_data_dict,
                num_epochs=100,
                lr=0.1,
                scheduler_params={'step_size': 10, 'gamma': 0.1},
                minimization=True)

2025-01-22 08:46:32,271 - decision_learning.modeling.train - INFO - Training on device: cpu
Training Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 1085.28it/s]
Validation Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 2340.38it/s]
2025-01-22 08:46:32,287 - decision_learning.modeling.train - INFO - epoch: 1, train_loss: 6.2497804164886475, val_metric: 0.4441674166544082, test_regret: nan
Training Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 1506.73it/s]
Validation Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 3465.14it/s]
2025-01-22 08:46:32,299 - decision_learning.modeling.train - INFO - epoch: 2, train_loss: 2.1314265557697842, val_metric: 0.36343905870346865, test_regret: nan
Training Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 1960.74it/s]
Validation Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 3442.79it/s]
2025-01-22 08:46:32,307 - decision_learning.modeling.train - INFO - epoch: 3, train_loss: 1.365099651472909, val_me

In [40]:
test_regret = calc_test_regret(pred_model=trained_model,
                               test_data_dict=final_data_test,
                               optmodel=optmodel)
print(test_regret)

0.05886764105361766


## Cosine Loss

In [36]:
# Prediction Model
pred_model = LinearRegression(input_dim=train_dict['X'].shape[1],
                 output_dim=train_dict['true_cost'].shape[1])

# optimization solver
optmodel = partial(shortest_path_solver,size=5)

# loss function
loss_fn = nn.CosineEmbeddingLoss()

# training, validation data
train_data_dict = train_dict
train_data_dict.update({'input2':train_dict['true_cost'], 
                       'target':torch.ones(train_dict['true_cost'].shape[0])}) # extra input key needed for loss function

val_data_dict = val_dict

In [37]:
metrics, trained_model = train(pred_model=pred_model,
                optmodel=optmodel,
                loss_fn=loss_fn,
                train_data_dict=train_data_dict,
                val_data_dict=val_data_dict,
                num_epochs=100,
                lr=0.1,
                scheduler_params={'step_size': 10, 'gamma': 0.1},
                minimization=True)

Training Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 525.73it/s]
Validation Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 1882.06it/s]
2024-12-03 09:09:43,731 - decision_learning.modeling.train - INFO - epoch: 1, train_loss: 0.3974379396864346, val_metric: 0.3210879526955124, test_regret: nan
Training Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 562.25it/s]
Validation Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 1906.63it/s]
2024-12-03 09:09:43,755 - decision_learning.modeling.train - INFO - epoch: 2, train_loss: 0.04923589580825397, val_metric: 0.2230186014717382, test_regret: nan
Training Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 561.80it/s]
Validation Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 1889.08it/s]
2024-12-03 09:09:43,779 - decision_learning.modeling.train - INFO - epoch: 3, train_loss: 0.03814942921910967, val_metric: 0.16545680539990607, test_regret: nan
Training Loader: Epoch 4/100: 100%|██████████| 7/

In [38]:
test_regret = calc_test_regret(pred_model=trained_model,
                               test_data_dict=final_data_test,
                               optmodel=optmodel)
print(test_regret)

0.030652094580479548


## CosineSurrogate

In [None]:
import importlib

import decision_learning.modeling.loss
importlib.reload(decision_learning.modeling.loss)
from decision_learning.modeling.loss import get_loss_function, CosineSurrogateDotProdMSE, CosineSurrogateDotProdVecMag

In [56]:
# Prediction Model
pred_model = LinearRegression(input_dim=train_dict['X'].shape[1],
                 output_dim=train_dict['true_cost'].shape[1])

# optimization solver
optmodel = partial(shortest_path_solver,size=5)

# loss function
loss_fn = CosineSurrogateDotProdVecMag(alpha=5)

# training, validation data
train_data_dict = train_dict
train_data_dict.update({'input2':train_dict['true_cost'], 
                       'target':torch.ones(train_dict['true_cost'].shape[0])}) # extra input key needed for loss function

val_data_dict = val_dict

In [57]:
metrics, trained_model = train(pred_model=pred_model,
                optmodel=optmodel,
                loss_fn=loss_fn,
                train_data_dict=train_data_dict,
                val_data_dict=val_data_dict,
                num_epochs=100,
                lr=0.1,
                scheduler_params={'step_size': 10, 'gamma': 0.1},
                minimization=True)

2025-01-22 08:51:27,517 - decision_learning.modeling.train - INFO - Training on device: cpu
Training Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 999.26it/s]
Validation Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 2384.09it/s]
2025-01-22 08:51:27,534 - decision_learning.modeling.train - INFO - epoch: 1, train_loss: 18.78922372630664, val_metric: 0.35656546827679825, test_regret: nan
Training Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 1419.05it/s]
Validation Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 3413.17it/s]
2025-01-22 08:51:27,545 - decision_learning.modeling.train - INFO - epoch: 2, train_loss: 0.9444219555173602, val_metric: 0.2952087447237551, test_regret: nan
Training Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 1889.33it/s]
Validation Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 3961.70it/s]
2025-01-22 08:51:27,554 - decision_learning.modeling.train - INFO - epoch: 3, train_loss: -9.093532834734235, val_met

In [58]:
test_regret = calc_test_regret(pred_model=trained_model,
                               test_data_dict=final_data_test,
                               optmodel=optmodel)
print(test_regret)

0.04870700092000947
