# Imports

In [1]:
import importlib
from functools import partial

from torch import nn
import torch
import pandas as pd
import numpy as np

import decision_learning.modeling.pipeline
import decision_learning.data.shortest_path_grid

from decision_learning.utils import handle_solver
from decision_learning.modeling.models import LinearRegression
from decision_learning.modeling.pipeline import lossfn_experiment_pipeline, lossfn_hyperparam_grid
from decision_learning.data.shortest_path_grid import genData

# Pipeline Setup Overview
To run pipeline function in `decision_learning.modeling.pipeline`, we need these components:
- data (features, true costs), and appropriate train-test splits
- prediction model: predicting true costs
- optimization model: linear optimization model parameterized by cost/coefficient vector for objective function, and returns the corresponding solution, objective value.
- existing loss functions (hyperparameter configs):loss functions to train the prediction model against true costs as labels and already implemented within `decision_learning.modeling.loss` specified with the loss name as string, and hyperparameters to search over as a dictionary.
- custom loss functions: user provided loss function as a callable, and data dictionary with appropriate features, and labels required by loss function
- misc params: other parameters to set for pipeline experiment function
    - val_split_params={'test_size':200, 'random_state':42},
    - training configuration: ex: {'num_epochs':100, 'dataloader_params': {'batch_size':200, 'shuffle':True}}

## Optimization Model (linear)

Decision-aware/focused problems require an optimization model to actually solve the decision problem. Since each decision problem is unique in terms of the modeling and solving, the user is expected to provide the optimization model function/object, which is treated like a black-box by the `pipeline`,`train`, and loss/regret functions in the code base. It could be Gurobi, Pyomo, or any user custom solver. However, to play nicely with the rest of the package, it must do the following:

- Input Argument when called:
    - costs: vector of objective function coefficients. Expected to be numpy np.ndarray or torch.tensor
- Returns 2 objects:
    - sols: solution to optimization model given the input costs. Expected to be numpy np.ndarray or torch.tensor
    - obj: objective value to optimization model given the input costs. Expected to be numpy np.ndarray or torch.tensor
 
The return objects of optimal solution and objective are generally returned as any solver, and any linear program needs its objective function to be parameterized by a vector of cost/coefficients.


### Example Solver/Optimization Model
- Below, `shortest_path_solver` is a custom user optimization model specified in the form of a callable function, and its first input argument is the vector of costs. The rest of the input arguments size, sens, need to be pre-set before being passed to `pipeline`, `train`, or any loss function. This can be accomplished using the `partial` python function (see example below). The exact implementation is not important but mainly that it:
    - accepts a costs vector input
    - returns solution (sol) and objective value (obj) for the input cost vector
- Note that `shortest_path_solver` also has two returns: sol, obj

In [2]:
def shortest_path_solver(costs, size, sens = 1e-4):
    if isinstance(size, np.ndarray):
        size = int(size[0])   
    elif isinstance(size, torch.Tensor):     
        size = int(size[0].item())
    
    if type(size) != int:
        size = int(size)
        
    # Forward Pass
    starting_ind = 0
    starting_ind_c = 0
    samples = costs.shape[0]
    V_arr = torch.zeros(samples, size ** 2)
    for i in range(0, 2 * (size - 1)):
        num_nodes = min(i + 1, 9 - i)
        num_nodes_next = min(i + 2, 9 - i - 1)
        num_arcs = 2 * (max(num_nodes, num_nodes_next) - 1)
        V_1 = V_arr[:, starting_ind:starting_ind + num_nodes]
        layer_costs = costs[:, starting_ind_c:starting_ind_c + num_arcs]
        l_costs = layer_costs[:, 0::2]
        r_costs = layer_costs[:, 1::2]
        next_V_val_l = torch.ones(samples, num_nodes_next) * float('inf')
        next_V_val_r = torch.ones(samples, num_nodes_next) * float('inf')
        if num_nodes_next > num_nodes:
            next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
            next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
        else:
            next_V_val_l = V_1[:, :num_nodes_next] + l_costs
            next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
        next_V_val = torch.minimum(next_V_val_l, next_V_val_r)
        V_arr[:, starting_ind + num_nodes:starting_ind + num_nodes + num_nodes_next] = next_V_val

        starting_ind += num_nodes
        starting_ind_c += num_arcs

    # Backward Pass
    starting_ind = size ** 2
    starting_ind_c = costs.shape[1]
    prev_act = torch.ones(samples, 1)
    sol = torch.zeros(costs.shape)
    for i in range(2 * (size - 1), 0, -1):
        num_nodes = min(i + 1, 9 - i)
        num_nodes_next = min(i, 9 - i + 1)
        V_1 = V_arr[:, starting_ind - num_nodes:starting_ind]
        V_2 = V_arr[:, starting_ind - num_nodes - num_nodes_next:starting_ind - num_nodes]

        num_arcs = 2 * (max(num_nodes, num_nodes_next) - 1)
        layer_costs = costs[:, starting_ind_c - num_arcs: starting_ind_c]

        if num_nodes < num_nodes_next:
            l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
            r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
            prev_act = torch.zeros(V_2.shape)
            prev_act[:, :num_nodes_next - 1] += l_cs_res
            prev_act[:, 1:num_nodes_next] += r_cs_res
        else:
            l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
            r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
            prev_act = torch.zeros(V_2.shape)
            prev_act += l_cs_res
            prev_act += r_cs_res
        cs = torch.zeros(layer_costs.shape)
        cs[:, ::2] = l_cs_res
        cs[:, 1::2] = r_cs_res
        sol[:, starting_ind_c - num_arcs: starting_ind_c] = cs

        starting_ind = starting_ind - num_nodes
        starting_ind_c = starting_ind_c - num_arcs
    # Dimension (samples, num edges)
    obj = torch.sum(sol * costs, axis=1)
    # Dimension (samples, 1)
    sol = sol.to(torch.float32)
    obj = obj.reshape(-1,1).to(torch.float32)
    return sol, obj

### Presetting non-cost input arguments of `shortest_path_solver`

In [3]:
# ------------optimization model------------
optmodel = partial(handle_solver, optmodel=shortest_path_solver, detach_tensor=False, solver_batch_solve=True)

## Data Generation Setup
Any decision-aware/focused problem will of course need data inputs. The example below uses a pre-implemented synthetic data generator provided in the package found within 
`decision_learning.data.shortest_path_grid` to generate shortest path problem and can be generated by calling the `genData` function

### Specific parameters to set up data generation
This data setup, and the synthetic data generation is in line with the paper https://arxiv.org/pdf/2402.03256 

### Create Experiments Grid
This shortest path experiment has two important settings:
- number of samples: less samples means higher error/more noise, more samples means lower error/less noise
- epsilon: noise level on edge costs, can be uniformly distributed multiplicative noise, or normally distributed additive noise
- This example below creates 100 trials for 8 different settings

In [4]:
# control the randomization seeding for pytorch
torch.manual_seed(105)
indices_arr = torch.randperm(100000)
indices_arr_test = torch.randperm(100000)

n_arr = [200, 400, 800, 1600] # array of number of samples for an experiment
ep_arr = ['unif', 'normal'] # noise type
trials = 100 # number of trials per setting

# create an array where each item is [number of samples, noise type, trial number] representing an experiment run
exp_arr = []
for n in n_arr:
    for ep in ep_arr:
        for t in range(trials):
            exp_arr.append([n, ep, t]) # add current [number of samples, noise type, trial number] experiment run setting

In [5]:
# setup
sim = 0 # simulation trial number, only show one experiment run for demonstration purposes
exp = exp_arr[sim] # current experiment
num_data = exp[0]  # number of training data
ep_type = exp[1] # noise type of current experiment
trial = exp[2] # trial number of current experiment

# shortest path problem data generation parameters - https://arxiv.org/pdf/2402.03256
grid = (5, 5)  # grid size
num_feat = 5  # size of feature
deg = 6  # polynomial degree in edge cost function
e = .3  # noise width/amount of noise

# path planting for shortest path example - see page 9, subsection "Harder Example with Planted Arcs" in section 4.2 of paper https://arxiv.org/pdf/2402.03256
planted_good_pwl_params = {'slope0':0, # slope of first segment of piecewise linear cost function for "good" edge cost planted
                    'int0':2, # intercept of first segment of piecewise linear cost function for "good" edge cost planted
                    'slope1':0, # slope of second segment of piecewise linear cost function for "good" edge cost planted
                    'int1':2} # intercept of second segment of piecewise linear cost function for "good" edge cost planted
planted_bad_pwl_params = {'slope0':4, # slope of first segment of piecewise linear cost function for "bad" edge cost planted
                    'int0':0, # intercept of first segment of piecewise linear cost function for "bad" edge cost planted
                    'slope1':0, # slope of second segment of piecewise linear cost function for "bad" edge cost planted
                    'int1':2.2} # intercept of second segment of piecewise linear cost function for "bad" edge cost planted
plant_edge = True # to plant edges in shortest path experiment or not

print(f'current experiment setting: number of data points {num_data}, epsilon type {ep_type}, trial number {trial}')

current experiment setting: number of data points 200, epsilon type unif, trial number 0


### Calling `genData` from `decision_learning.data.shortest_path_grid`

In [6]:
# ------------DATA------------
# training data
generated_data = genData(num_data=num_data+200, # number of data points to generate for training set
        num_features=num_feat, # number of features 
        grid=grid, # grid shape
        deg=deg, # polynomial degree
        noise_type=ep_type, # epsilon noise type
        noise_width=e, # amount of noise
        seed=indices_arr[trial], # seed the randomness
        plant_edges=plant_edge, # to plant edges or not
        planted_good_pwl_params=planted_good_pwl_params, # cost function for good edges
        planted_bad_pwl_params=planted_bad_pwl_params) # cost function for bad edges

# testing data
generated_data_test = genData(num_data=10000, # number of data points to generate for test set
        num_features=num_feat, # number of features 
        grid=grid,  # grid shape
        deg=deg,  # polynomial degree
        noise_type=ep_type,  # epsilon noise type
        noise_width=e, # amount of noise
        seed=indices_arr_test[trial],      # seed the randomness
        plant_edges=plant_edge, # to plant edges or not
        planted_good_pwl_params=planted_good_pwl_params, # cost function for good edges
        planted_bad_pwl_params=planted_bad_pwl_params) # cost function for bad edges


train_solver_kwargs = {'size': np.zeros(len(generated_data['cost'])) + 5}
test_solver_kwargs = {'size': np.zeros(len(generated_data_test['cost'])) + 5}

## Prediction Model
- Any decision-aware/focused problem will of course need prediction model to predict the cost/coefficient vector given contextual input/features. This example uses a simple `LinearRegression` object implemented within `decision_learning.modeling.models`. 
- The package expects the prediction model to be a PyTorch model since PyTorch offers convenient autograd functionality/allows user to specify custom losses/backwards passes that are found within many decision-aware/focused works.

In [7]:
# ------------prediction model------------
pred_model = LinearRegression(input_dim=generated_data['feat'].shape[1],
                 output_dim=generated_data['cost'].shape[1])

# Pipeline Function Overview
Pipeline function `lossfn_experiment_pipeline` from `decision_learning.modeling.pipeline` takes in the following arguments:
- X_train: training set features
- true_cost_train: training set true costs
- X_test: test set features
- true_cost_test: test set true costs
- predmodel: pytorch prediction model
- optmodel: optimization model
- val_split_params: how to split training data into train/val splits. Defaults to {'test_size':0.2, 'random_state':42}.
- loss_names: list of loss functions to run experiment pipeline on that are implemented already in the codebase in decision_learning.modeling.loss
- loss_configs: dictionary mapping from loss_name (key) to a dictionary of different hyperparameters that are then grid searched over.
- custom_loss_inputs:list of custom loss function configurations to run through the train function as part of experient pipeline
- minimize: minimization problem?
- training_configs: parameters to be passed into train function for pytorch training loop. 
- save_models: flag to save models or not.

Note when running pipeline function, we turn off training loop logging/console output for each experiment setting, however, this can still be turned on by setting `training_loop_verbose=True` flag on when calling pipeline function

# Example: Off-the-Shelf Preimplemented Loss Functions
Here since we only use off-the-shelf preimplemented loss functions, without any hyperparameter searching, we only need to specify the individual loss names `['SPO+', 'MSE']` to `loss_name` argument

In [9]:
import decision_learning.modeling.pipeline
importlib.reload(decision_learning.modeling.pipeline)
from decision_learning.modeling.pipeline import lossfn_experiment_pipeline

In [13]:
preimplement_loss_results, preimplement_loss_models = lossfn_experiment_pipeline(X_train=generated_data['feat'],
                true_cost_train=generated_data['cost'],
                X_test=generated_data_test['feat'],
                true_cost_test=generated_data_test['cost_true'], 
                predmodel=pred_model,
                optmodel=optmodel,
                train_solver_kwargs=train_solver_kwargs,
                test_solver_kwargs=test_solver_kwargs,
                val_split_params={'test_size':200, 'random_state':42},
                loss_names=['SPO+', 'MSE', 'Cosine'],                            
                training_configs={'num_epochs':100,
                                 'dataloader_params': {'batch_size':32, 'shuffle':True}},
                save_models=True                                                                              
                )

  next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
  next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
  next_V_val_l = V_1[:, :num_nodes_next] + l_costs
  next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
  l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
  r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
  l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
  r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
  obj = torch.sum(sol * costs, axis=1)
2025-02-04 11:33:51,408 - decision_learning.modeling.pipeline - INFO - Loss number 1/3, on loss function SPO+
2025-02-04 11:33:51,408 - decision_learning.modeling.pipeline - INFO - Trial 1/1 for running loss function SPO+, current hyperparameters: {}


2025-02-04 11:33:51,410 - decision_learning.modeling.train - INFO - Training on device: cpu
2025-02-04 11:33:52,305 - decision_learning.utils - INFO - Function 'train' took 0.8952939510345459 seconds to run.
2025-02-04 11:33:52,306 - decision_learning.modeling.pipeline - INFO - Loss number 2/3, on loss function MSE
2025-02-04 11:33:52,306 - decision_learning.modeling.pipeline - INFO - Trial 1/1 for running loss function MSE, current hyperparameters: {}
2025-02-04 11:33:52,307 - decision_learning.modeling.train - INFO - Training on device: cpu
2025-02-04 11:33:52,857 - decision_learning.utils - INFO - Function 'train' took 0.5508370399475098 seconds to run.
2025-02-04 11:33:52,858 - decision_learning.modeling.pipeline - INFO - Loss number 3/3, on loss function Cosine
2025-02-04 11:33:52,858 - decision_learning.modeling.pipeline - INFO - Trial 1/1 for running loss function Cosine, current hyperparameters: {}
2025-02-04 11:33:52,859 - decision_learning.modeling.train - INFO - Training on 

In [14]:
preimplement_loss_results[preimplement_loss_results.epoch == 99]

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
99,99,3.818337,0.068014,0.032343,SPO+,{}
199,99,0.866071,0.106883,0.063263,MSE,{}
299,99,0.021646,0.061828,0.029458,Cosine,{}


### Saved Down Models
models saved as dictionary

In [15]:
preimplement_loss_models

{'SPO+_{}': LinearRegression(
   (linear): Linear(in_features=6, out_features=40, bias=True)
 ),
 'MSE_{}': LinearRegression(
   (linear): Linear(in_features=6, out_features=40, bias=True)
 ),
 'Cosine_{}': LinearRegression(
   (linear): Linear(in_features=6, out_features=40, bias=True)
 )}

# Providing Hyperparameter Search Example
Here since we we are still using off-the-shelf preimplemented loss functions, but now since PG loss accepts two arguments ('h': width size, and 'finite_diff_type': finite different scheme}, we can search over the hyperparameters by inputting them into the `loss_configs` argument in the exaxmple below as: `{'PG': {'h':[num_data**-.125, num_data**-.25, num_data**-.5, num_data**-1], 'finite_diff_type': ['B', 'C', 'F']}}`. The pipeline function will use a helper function `lossfn_hyperparam_grid` to take the cartesian product of the `h` and `finite_diff_type` arrays.

In [16]:
PG_results, PG_models = lossfn_experiment_pipeline(X_train=generated_data['feat'],
                true_cost_train=generated_data['cost'],
                X_test=generated_data_test['feat'],
                true_cost_test=generated_data_test['cost_true'], 
                predmodel=pred_model,
                optmodel=optmodel,
                train_solver_kwargs=train_solver_kwargs,
                test_solver_kwargs=test_solver_kwargs,
                val_split_params={'test_size':200, 'random_state':42},
                loss_names=['PG'],
                loss_configs={'PG': {'h':[num_data**-.125, num_data**-.25, num_data**-.5, num_data**-1], 'finite_diff_type': ['B', 'C', 'F']}},
                training_configs={'num_epochs':100,
                                 'dataloader_params': {'batch_size':32, 'shuffle':True}},
                save_models=False
                )

  next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
  next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
  next_V_val_l = V_1[:, :num_nodes_next] + l_costs
  next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
  l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
  r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
  l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
  r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
  obj = torch.sum(sol * costs, axis=1)
2025-02-04 11:34:09,449 - decision_learning.modeling.pipeline - INFO - Loss number 1/1, on loss function PG
2025-02-04 11:34:09,451 - decision_learning.modeling.pipeline - INFO - Trial 1/12 for running loss function PG, current hyperparameters: {'h': 0.5156692688606229, 'finite_diff_type': 'B'}
2025-02-04 11:34:09,453 - decision_learning.modeling.train - INFO 

2025-02-04 11:34:10,665 - decision_learning.utils - INFO - Function 'train' took 1.2116978168487549 seconds to run.
2025-02-04 11:34:10,665 - decision_learning.modeling.pipeline - INFO - Trial 2/12 for running loss function PG, current hyperparameters: {'h': 0.5156692688606229, 'finite_diff_type': 'C'}
2025-02-04 11:34:10,666 - decision_learning.modeling.train - INFO - Training on device: cpu
2025-02-04 11:34:11,813 - decision_learning.utils - INFO - Function 'train' took 1.1472151279449463 seconds to run.
2025-02-04 11:34:11,813 - decision_learning.modeling.pipeline - INFO - Trial 3/12 for running loss function PG, current hyperparameters: {'h': 0.5156692688606229, 'finite_diff_type': 'F'}
2025-02-04 11:34:11,814 - decision_learning.modeling.train - INFO - Training on device: cpu
2025-02-04 11:34:12,957 - decision_learning.utils - INFO - Function 'train' took 1.143430233001709 seconds to run.
2025-02-04 11:34:12,958 - decision_learning.modeling.pipeline - INFO - Trial 4/12 for running

In [17]:
PG_results[PG_results.epoch == 99]

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
99,99,14.842943,0.115022,0.069211,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
199,99,14.161848,0.044928,0.007162,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
299,99,13.717442,0.088385,0.046018,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
399,99,14.951248,0.116574,0.070373,PG,"{'h': 0.26591479484724945, 'finite_diff_type':..."
499,99,14.634395,0.088882,0.042735,PG,"{'h': 0.26591479484724945, 'finite_diff_type':..."
599,99,14.012732,0.053794,0.012122,PG,"{'h': 0.26591479484724945, 'finite_diff_type':..."
699,99,15.130478,0.14769,0.098016,PG,"{'h': 0.07071067811865475, 'finite_diff_type':..."
799,99,14.809798,0.125931,0.075467,PG,"{'h': 0.07071067811865475, 'finite_diff_type':..."
899,99,15.013995,0.144541,0.091452,PG,"{'h': 0.07071067811865475, 'finite_diff_type':..."
999,99,16.233646,0.246358,0.191938,PG,"{'h': 0.005, 'finite_diff_type': 'B'}"


In [18]:
PG_models

{}

### Additional Parameter Tuning Example - CosineSurrogateDotProdVecMag

In [19]:
cos_surr_results, cos_surr_models = lossfn_experiment_pipeline(X_train=generated_data['feat'],
                true_cost_train=generated_data['cost'],
                X_test=generated_data_test['feat'],
                true_cost_test=generated_data_test['cost_true'], 
                predmodel=pred_model,
                optmodel=optmodel,
                train_solver_kwargs=train_solver_kwargs,
                test_solver_kwargs=test_solver_kwargs,
                val_split_params={'test_size':200, 'random_state':42},
                loss_names=['CosineSurrogateDotProdVecMag'],
                loss_configs={'CosineSurrogateDotProdVecMag': {'alpha':[0.01, 0.1, 1, 2.5, 5, 7.5, 10]}},
                training_configs={'num_epochs':100,
                                 'dataloader_params': {'batch_size':32, 'shuffle':True}},
                save_models=False
                )

  next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
  next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
  next_V_val_l = V_1[:, :num_nodes_next] + l_costs
  next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
  l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
  r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
  l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
  r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
  obj = torch.sum(sol * costs, axis=1)
2025-02-04 11:34:48,352 - decision_learning.modeling.pipeline - INFO - Loss number 1/1, on loss function CosineSurrogateDotProdVecMag


2025-02-04 11:34:48,353 - decision_learning.modeling.pipeline - INFO - Trial 1/7 for running loss function CosineSurrogateDotProdVecMag, current hyperparameters: {'alpha': 0.01}
2025-02-04 11:34:48,354 - decision_learning.modeling.train - INFO - Training on device: cpu
2025-02-04 11:34:48,918 - decision_learning.utils - INFO - Function 'train' took 0.5641160011291504 seconds to run.
2025-02-04 11:34:48,919 - decision_learning.modeling.pipeline - INFO - Trial 2/7 for running loss function CosineSurrogateDotProdVecMag, current hyperparameters: {'alpha': 0.1}
2025-02-04 11:34:48,919 - decision_learning.modeling.train - INFO - Training on device: cpu
2025-02-04 11:34:49,421 - decision_learning.utils - INFO - Function 'train' took 0.5013349056243896 seconds to run.
2025-02-04 11:34:49,421 - decision_learning.modeling.pipeline - INFO - Trial 3/7 for running loss function CosineSurrogateDotProdVecMag, current hyperparameters: {'alpha': 1}
2025-02-04 11:34:49,422 - decision_learning.modeling.t

In [20]:
cos_surr_results[cos_surr_results.epoch == 99]

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
99,99,-1590.639073,0.318395,0.230097,CosineSurrogateDotProdVecMag,{'alpha': 0.01}
199,99,-690.318542,0.241694,0.173082,CosineSurrogateDotProdVecMag,{'alpha': 0.1}
299,99,-76.590943,0.091228,0.049137,CosineSurrogateDotProdVecMag,{'alpha': 1}
399,99,-32.698903,0.088055,0.050155,CosineSurrogateDotProdVecMag,{'alpha': 2.5}
499,99,-15.149711,0.091727,0.054726,CosineSurrogateDotProdVecMag,{'alpha': 5}
599,99,-10.697785,0.081171,0.049264,CosineSurrogateDotProdVecMag,{'alpha': 7.5}
699,99,-7.673268,0.091153,0.055243,CosineSurrogateDotProdVecMag,{'alpha': 10}


Original

In [None]:
cos_surr_results[cos_surr_results.epoch == 99]

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
99,99,-262.287231,0.391855,0.25006,CosineSurrogateDotProdVecMag,{'alpha': 0.01}
199,99,-227.810394,0.478831,0.267152,CosineSurrogateDotProdVecMag,{'alpha': 0.1}
299,99,-73.14035,0.299098,0.129629,CosineSurrogateDotProdVecMag,{'alpha': 1}
399,99,-30.2407,0.258759,0.075556,CosineSurrogateDotProdVecMag,{'alpha': 2.5}
499,99,-15.257957,0.204101,0.068355,CosineSurrogateDotProdVecMag,{'alpha': 5}
599,99,-10.171493,0.242172,0.106509,CosineSurrogateDotProdVecMag,{'alpha': 7.5}
699,99,-7.603749,0.249935,0.125792,CosineSurrogateDotProdVecMag,{'alpha': 10}


# Specific Model Initialization Example
Here we will use the pre-trained `SPO+` model as initialization point for PG loss example from above and observe the improvement in test_regret

In [26]:
PG_init_results, PG_init_models = lossfn_experiment_pipeline(X_train=generated_data['feat'],
                true_cost_train=generated_data['cost'],
                X_test=generated_data_test['feat'],
                true_cost_test=generated_data_test['cost_true'], 
                predmodel=preimplement_loss_models['SPO+_{}'],
                optmodel=optmodel,                
                train_solver_kwargs=train_solver_kwargs,
                test_solver_kwargs=test_solver_kwargs,
                val_split_params={'test_size':200, 'random_state':42},
                loss_names=['PG'],
                loss_configs={'PG': {'h':[num_data**-.125, num_data**-.25, num_data**-.5, num_data**-1], 'finite_diff_type': ['B', 'C', 'F']}},
                training_configs={'num_epochs':100,
                                 'dataloader_params': {'batch_size':32, 'shuffle':True}},
                save_models=False
                )

  next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
  next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
  next_V_val_l = V_1[:, :num_nodes_next] + l_costs
  next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
  l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
  r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
  l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
  r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
  obj = torch.sum(sol * costs, axis=1)
2025-02-04 11:35:33,321 - decision_learning.modeling.pipeline - INFO - Loss number 1/1, on loss function PG
2025-02-04 11:35:33,322 - decision_learning.modeling.pipeline - INFO - Trial 1/12 for running loss function PG, current hyperparameters: {'h': 0.5156692688606229, 'finite_diff_type': 'B'}
2025-02-04 11:35:33,324 - decision_learning.modeling.train - INFO 

In [27]:
PG_init_results[PG_init_results.epoch == 99]

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
99,99,14.293769,0.043477,0.009471,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
199,99,13.995255,0.044939,0.008897,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
299,99,13.945991,0.107598,0.057595,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
399,99,13.933623,0.046911,0.007061,PG,"{'h': 0.26591479484724945, 'finite_diff_type':..."
499,99,14.274433,0.046529,0.008756,PG,"{'h': 0.26591479484724945, 'finite_diff_type':..."
599,99,14.178854,0.051247,0.008525,PG,"{'h': 0.26591479484724945, 'finite_diff_type':..."
699,99,14.361191,0.050816,0.010623,PG,"{'h': 0.07071067811865475, 'finite_diff_type':..."
799,99,13.986787,0.045326,0.007424,PG,"{'h': 0.07071067811865475, 'finite_diff_type':..."
899,99,14.519786,0.049164,0.015538,PG,"{'h': 0.07071067811865475, 'finite_diff_type':..."
999,99,14.314454,0.068014,0.032343,PG,"{'h': 0.005, 'finite_diff_type': 'B'}"


# Custom Loss Function Example
Simple example using `nn.CosineEmbeddingLoss`, which takes different input arguments then our existing decision-aware loss functions

In [28]:
# ------------custom loss function inputs------------
# every dictionary key is necessary for custom loss inputs 
custom_loss_inputs = [{'loss_name':'cosine', # name of loss function - used just for final metric logging purposes
                      'loss':nn.CosineEmbeddingLoss, # callable function 
                      'data': {'X': generated_data['feat'], # data input for loss function, X is the feature name, it must be X for pipeline function
                               'input2':generated_data['cost'], # remaining is whatever labels, arguments, the loss function needs. input2, target are arguments used by nn.CosineEmbeddingLoss
                               'target':torch.ones(generated_data['cost'].shape[0])}
                      }
                     ]


Here we also set `training_loop_verbose=True` to showcase console/logging output when we allow for training loop outputs

In [29]:
import decision_learning.modeling.pipeline
importlib.reload(decision_learning.modeling.pipeline)
from decision_learning.modeling.pipeline import lossfn_experiment_pipeline

In [30]:
custom_results, custom_models = lossfn_experiment_pipeline(X_train=generated_data['feat'],
                true_cost_train=generated_data['cost'],
                X_test=generated_data_test['feat'],
                true_cost_test=generated_data_test['cost_true'], 
                predmodel=pred_model,
                optmodel=optmodel,
                train_solver_kwargs=train_solver_kwargs,
                test_solver_kwargs=test_solver_kwargs,
                val_split_params={'test_size':200, 'random_state':42},                                
                custom_loss_inputs=custom_loss_inputs,
                training_configs={'num_epochs':100,
                                 'dataloader_params': {'batch_size':32, 'shuffle':True}},
                save_models=False,
                training_loop_verbose=True
                )

  next_V_val_l[:, :num_nodes_next - 1] = V_1 + l_costs
  next_V_val_r[:, 1:num_nodes_next] = V_1 + r_costs
  next_V_val_l = V_1[:, :num_nodes_next] + l_costs
  next_V_val_r = V_1[:, 1:num_nodes_next + 1] + r_costs
  l_cs_res = ((V_2[:, :num_nodes_next - 1] - V_1 + layer_costs[:, ::2]) < sens) * prev_act
  r_cs_res = ((V_2[:, 1:num_nodes_next] - V_1 + layer_costs[:, 1::2]) < sens) * prev_act
  l_cs_res = ((V_2 - V_1[:, :num_nodes - 1] + layer_costs[:, ::2]) < sens) * prev_act[:, :num_nodes - 1]
  r_cs_res = ((V_2 - V_1[:, 1:num_nodes] + layer_costs[:, 1::2]) < sens) * prev_act[:, 1:num_nodes]
  obj = torch.sum(sol * costs, axis=1)
2025-02-04 11:35:57,724 - decision_learning.modeling.pipeline - INFO - Trial 1/1 for custom loss functions, current loss function: cosine
2025-02-04 11:35:57,730 - decision_learning.modeling.train - INFO - Training on device: cpu


Training Loader: Epoch 1/100: 100%|██████████| 7/7 [00:00<00:00, 1119.25it/s]
2025-02-04 11:35:57,748 - decision_learning.modeling.train - INFO - epoch: 1, train_loss: 0.7574476259095329, val_metric: 0.5229720056566004, test_regret: 0.47177226737189426
Training Loader: Epoch 2/100: 100%|██████████| 7/7 [00:00<00:00, 1422.21it/s]
2025-02-04 11:35:57,764 - decision_learning.modeling.train - INFO - epoch: 2, train_loss: 0.5515320684228625, val_metric: 0.5253658028558674, test_regret: 0.4616907710520226
Training Loader: Epoch 3/100: 100%|██████████| 7/7 [00:00<00:00, 1426.22it/s]
2025-02-04 11:35:57,778 - decision_learning.modeling.train - INFO - epoch: 3, train_loss: 0.393504981483732, val_metric: 0.5197530947844433, test_regret: 0.4482718987633784
Training Loader: Epoch 4/100: 100%|██████████| 7/7 [00:00<00:00, 1796.17it/s]
2025-02-04 11:35:57,791 - decision_learning.modeling.train - INFO - epoch: 4, train_loss: 0.2704984779868807, val_metric: 0.4983010696238059, test_regret: 0.427218404

In [31]:
custom_results[custom_results.epoch == 99]

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
99,99,0.020716,0.064011,0.029448,cosine,


# Combine all existing examples so far

In [32]:
combined_results = pd.concat([preimplement_loss_results, custom_results, PG_init_results], ignore_index=True)

## Find test regret using validation regret for hyperparameter selection

In [33]:
combined_results.loc[combined_results.groupby('loss_name')['val_metric'].idxmin()].sort_values(by='test_regret')

Unnamed: 0,epoch,train_loss,val_metric,test_regret,loss_name,hyperparameters
497,97,14.291678,0.042557,0.009365,PG,"{'h': 0.5156692688606229, 'finite_diff_type': ..."
276,76,0.021567,0.058663,0.025965,Cosine,{}
379,79,0.021325,0.05757,0.026365,cosine,
70,70,4.137239,0.063587,0.031507,SPO+,{}
189,89,0.772511,0.104221,0.064851,MSE,{}
