# ERM agents

> Newsvendor agents based on Empirical Risk Minimization (ERM) principles.

In [None]:
#| default_exp agents.newsvendor.erm

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export

import logging

from abc import ABC, abstractmethod
from typing import Union, Optional, List, Tuple, Literal, Callable, Dict
import numpy as np
import os
from tqdm import tqdm
import time
from IPython import get_ipython

from ddopnew.envs.base import BaseEnvironment
from ddopnew.agents.base import BaseAgent
from ddopnew.utils import MDPInfo, Parameter, DatasetWrapper, DatasetWrapperMeta
from ddopnew.torch_utils.loss_functions import TorchQuantileLoss, TorchPinballLoss
from ddopnew.obsprocessors import FlattenTimeDimNumpy
from ddopnew.dataloaders.base import BaseDataLoader
from ddopnew.ml_utils import LRSchedulerPerStep

import torch

from torchinfo import summary

In [None]:
#| export

class SGDBaseAgent(BaseAgent):

    """
    Base class for Agents that are trained using Stochastic Gradient Descent (SGD) on PyTorch models.
    """

    # TODO: Remove input shapes as input end get from MDPInfo

    train_mode = "epochs_fit"
    
    def __init__(self, 
            environment_info: MDPInfo,
            dataloader: BaseDataLoader,
            input_shape: Tuple,
            output_shape: Tuple,
            dataset_params: Optional[dict] = None, # parameters needed to convert the dataloader to a torch dataset
            dataloader_params: Optional[dict] = None, # default: {"batch_size": 32, "shuffle": True}
            optimizer_params: Optional[dict] = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
            learning_rate_scheduler_params: Dict | None = None, # default: None. If dict, then first key is "scheduler" and the rest are the parameters
            obsprocessors: Optional[List] = None,     # default: []
            device: str = "cpu", # "cuda" or "cpu"
            agent_name: str | None = None,
            test_batch_size: int = 1024,
            receive_batch_dim: bool = False,
            ):

        # Initialize default values for mutable arguments
        optimizer_params = optimizer_params or {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
        dataloader_params = dataloader_params or {"batch_size": 32, "shuffle": True}
        dataset_params = dataset_params or {}

        self.device = self.set_device(device)
        
        self.set_dataloader(dataloader, dataset_params, dataloader_params)

        self.set_model(input_shape, output_shape)
        self.loss_function_params=None # default
        self.set_loss_function()
        self.set_optimizer(optimizer_params)
        self.set_learning_rate_scheduler(learning_rate_scheduler_params)
        self.test_batch_size = test_batch_size

        super().__init__(environment_info = environment_info, obsprocessors = obsprocessors, agent_name = agent_name, receive_batch_dim = receive_batch_dim)

        batch_dim = 1
        logging.info("Network architecture:")
        if logging.getLogger().isEnabledFor(logging.INFO):

            self.model.eval()
            if any(isinstance(obsprocessor, FlattenTimeDimNumpy) for obsprocessor in self.obsprocessors):
                input_size = (batch_dim, int(np.prod(input_shape)))
            else:
                input_size = (batch_dim, *input_shape)

            input_tensor = torch.randn(*input_size).to(self.device)
            input_tuple = (input_tensor,)

            if get_ipython() is not None:
                print(summary(self.model, input_data=input_tuple, device=self.device))
            else:
                summary(self.model, input_data=input_tuple, device=self.device)
            time.sleep(0.2)

        self.to(self.device)

    def set_device(self, device: str):

        """ Set the device for the model """

        if device == "cuda":
            if torch.cuda.is_available():
                return "cuda"
            else:
                logging.warning("CUDA is not available. Using CPU instead.")
                return "cpu"
        elif device == "cpu":
            return "cpu"
        else:
            raise ValueError(f"Device {device} not currently not supported, use 'cuda' or 'cpu'")


    def set_dataloader(self,
                        dataloader: BaseDataLoader,
                        dataset_params: dict,
                        dataloader_params: dict, # dict with keys: batch_size, shuffle
                        ) -> None: 

        """
        Set the dataloader for the agent by wrapping it into a Torch Dataset
        
        """

        # check if class already have a dataloader
        if not hasattr(self, 'dataloader'):

            dataset = DatasetWrapper(dataloader, **dataset_params)
            self.dataloader = torch.utils.data.DataLoader(dataset, **dataloader_params)

    @abstractmethod
    def set_loss_function(self):
        """ Set loss function for the model """
        pass

    @abstractmethod
    def set_model(self, input_shape: Tuple, output_shape: Tuple):
        """ Set the model for the agent """
        pass

    def set_optimizer(self, optimizer_params: dict): # dict with keys: optimizer, lr, weight_decay
        
        """ Set the optimizer for the model """

        if not hasattr(self, 'optimizer'):
            
            optimizer = optimizer_params["optimizer"]
            optimizer_params_copy = optimizer_params.copy()
            del optimizer_params_copy["optimizer"]

            if optimizer == "Adam":
                self.optimizer = torch.optim.Adam(self.model.parameters(), **optimizer_params_copy)
            elif optimizer == "SGD":
                self.optimizer = torch.optim.SGD(self.model.parameters(), **optimizer_params_copy)
            elif optimizer == "RMSprop":
                self.optimizer = torch.optim.RMSprop(self.model.parameters(), **optimizer_params_copy)
            else:
                raise ValueError(f"Optimizer {optimizer} not supported")
        
    def set_learning_rate_scheduler(self, learning_rate_scheduler_params): #
        """ Set learning rate scheudler (can be None) """

        if learning_rate_scheduler_params is not None:

            params = learning_rate_scheduler_params.copy()
            scheduler_type = params["scheduler"]
            del params["scheduler"]
            if scheduler_type == "LRSchedulerPerStep":
                self.learning_rate_scheduler = LRSchedulerPerStep(self.optimizer, **params)
            else:
                raise ValueError(f"Learning rate scheduler {scheduler_type} not supported")

        else:
            self.learning_rate_scheduler = None

    def fit_epoch(self):

        """ Fit the model for one epoch using the dataloader """

        device = next(self.model.parameters()).device
        self.model.train()
        total_loss=0

        for i, output in enumerate(tqdm(self.dataloader)):
            
            if len(output)==3:
                X, y, loss_function_params = output
            else:
                X, y = output
                loss_function_params = None

            # convert X and y to float32
            X = X.type(torch.float32)
            y = y.type(torch.float32)
            
            X, y = X.to(device), y.to(device)

            self.optimizer.zero_grad()

            y_pred = self.model(X)

            if loss_function_params is not None:
                loss = self.loss_function(y_pred, y, **loss_function_params)
            elif self.loss_function_params is not None:
                loss = self.loss_function(y_pred, y, **self.loss_function_params)
            else:
                loss = self.loss_function(y_pred, y)

            loss.backward()
            self.optimizer.step()

            if self.learning_rate_scheduler is not None:
                self.learning_rate_scheduler.step()
        
            total_loss += loss.item()
        
        self.model.eval()
        
        return total_loss

    def draw_action_(self, observation: np.ndarray) -> np.ndarray: #
        
        """ 
        Draw an action based on the fitted model (see predict method)
        """
        
        action = self.predict(observation)
        
        return action
    
    @staticmethod
    def split_into_batches(X: np.ndarray, batch_size: int) -> List[np.ndarray]: #
        """ Split the input into batches of the specified size """
        return [X[i:i+batch_size] for i in range(0, len(X), batch_size)]

    def predict(self, X: np.ndarray) -> np.ndarray: #
        """ Do one forward pass of the model and return the prediction """

        device = next(self.model.parameters()).device
        self.model.eval()

        batches = self.split_into_batches(X, self.test_batch_size)

        y_pred_full = []
        for batch in batches:

            X = batch

            X = torch.tensor(X, dtype=torch.float32)
            X = X.to(device)

            with torch.no_grad():

                y_pred = self.model(X)

                # check if y_pred is not finite:
                if not torch.all(torch.isfinite(y_pred)):

                    print(y_pred)

                    # check if X is not finite:
                    if not torch.all(torch.isfinite(X)):

                        print("X is not finite")
                        print("total X_shape: ", X.shape)
                        print("non-finite indices: ", torch.nonzero(~torch.isfinite(X)))
                        print(X)


                    raise ValueError("Predicted values are not finite")

            y_pred = y_pred.cpu().numpy()

            y_pred_full.append(y_pred)
        
        y_pred_full = np.concatenate(y_pred_full, axis=0)

        return y_pred_full

    def train(self):
        """set the internal state of the agent and its model to train"""
        self.mode = "train"
        self.model.train()

    def eval(self):
        """set the internal state of the agent and its model to eval"""
        self.mode = "eval"
        self.model.eval()

    def to(self, device: str): #
        """Move the model to the specified device"""
        self.model.to(device)

    def save(self,
                path: str, # The directory where the file will be saved.
                overwrite: bool=True): # Allow overwriting; if False, a FileExistsError will be raised if the file exists.
        
        """
        Save the PyTorch model to a file in the specified directory.

        """
        
        if not hasattr(self, 'model') or self.model is None:
            raise AttributeError("Model is not defined in the class.")

        # Create the directory path if it does not exist
        os.makedirs(path, exist_ok=True)

        # Construct the file path using os.path.join for better cross-platform compatibility
        full_path = os.path.join(path, "model.pth")

        if os.path.exists(full_path):
            if not overwrite:
                raise FileExistsError(f"The file {full_path} already exists and will not be overwritten.")
            else:
                logging.debug(f"Overwriting file {full_path}") # Only log with info as during training we will continuously overwrite the model
        
        # Save the model's state_dict using torch.save
        torch.save(self.model.state_dict(), full_path)
        logging.debug(f"Model saved successfully to {full_path}")

    def load(self, path: str): # Only the path to the folder is needed, not the file itself
 
        """
        Load the PyTorch model from a file.
        """
        
        if not hasattr(self, 'model') or self.model is None:
            raise AttributeError("Model is not defined in the class.")

        # Construct the file path
        full_path = os.path.join(path, "model.pth")

        if not os.path.exists(full_path):
            raise FileNotFoundError(f"The file {full_path} does not exist.")

        try:
            # Load the model's state_dict using torch.load
            if self.device == "cuda":
                self.model.load_state_dict(torch.load(full_path))
            else:
                self.model.load_state_dict(torch.load(full_path, map_location=torch.device('cpu')))
            logging.debug(f"Model loaded successfully from {full_path}")
        except Exception as e:
            raise RuntimeError(f"An error occurred while loading the model: {e}")
    

In [None]:
show_doc(SGDBaseAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L31){target="_blank" style="float:right; font-size:smaller"}

## SGDBaseAgent

>      SGDBaseAgent (environment_info:ddopnew.utils.MDPInfo,
>                    dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                    input_shape:Tuple, output_shape:Tuple,
>                    dataset_params:Optional[dict]=None,
>                    dataloader_params:Optional[dict]=None,
>                    optimizer_params:Optional[dict]=None,
>                    learning_rate_scheduler_params:Optional[Dict]=None,
>                    obsprocessors:Optional[List]=None, device:str='cpu',
>                    agent_name:str|None=None, test_batch_size:int=1024,
>                    receive_batch_dim:bool=False)

*Base class for Agents that are trained using Stochastic Gradient Descent (SGD) on PyTorch models.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| dataset_params | Optional | None | parameters needed to convert the dataloader to a torch dataset |
| dataloader_params | Optional | None | default: {"batch_size": 32, "shuffle": True} |
| optimizer_params | Optional | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| learning_rate_scheduler_params | Optional | None | default: None. If dict, then first key is "scheduler" and the rest are the parameters |
| obsprocessors | Optional | None | default: [] |
| device | str | cpu | "cuda" or "cpu" |
| agent_name | str \| None | None |  |
| test_batch_size | int | 1024 |  |
| receive_batch_dim | bool | False |  |

### Important notes:

SGD-based agents are all agents that are trained via SGD such as Linear Models or Neural Networks. Some specific requirements are necessary to make them interface properly with the environment.

**Torch perprocessors**:

* In addition to the general Numpy-based pre-processor, we also provide pre-processors that work on tensor level within the ```fit_epoch``` method and the ```predict``` method. They can be used in addition to the numpy-based pre-processors or instead of them. It's important to ensure that the shape of observations (after pre-processing) is the same for those from the environemnt and those from the dataloader during training.

**Dataloader**:

* As for normal supervised learning via Torch, we make use of the Torch dataloader to load the data. Instead of defining a custom dataset class, we provide a Wrapper that can be used around our dataloader to make its output and interface the same as a Torch dataset. The dataloader is then initialized when the agent is created such that the agent has access to the same dataloader as the environment.
 
**Training process**:

* The outper loop of the training process (epochs) is handled outside the agent by the ```run_experiment```functions (or can also be customized). The agent needs to have a ```fit_epoch``` method that tells the agent what to do within an epoch. 
This includes:
    * Getting the data from the dataloader
    * Pre-processing the data
    * Forward pass
    * Loss calculation
    * Backward pass

In [None]:
show_doc(SGDBaseAgent.set_dataloader)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L112){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_dataloader

>      SGDBaseAgent.set_dataloader
>                                   (dataloader:ddopnew.dataloaders.base.BaseDat
>                                   aLoader, dataset_params:dict,
>                                   dataloader_params:dict)

*Set the dataloader for the agent by wrapping it into a Torch Dataset*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| dataloader | BaseDataLoader |  |
| dataset_params | dict |  |
| dataloader_params | dict | dict with keys: batch_size, shuffle |
| **Returns** | **None** |  |

In [None]:
show_doc(SGDBaseAgent.set_loss_function)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L130){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_loss_function

>      SGDBaseAgent.set_loss_function ()

*Set loss function for the model*

In [None]:
show_doc(SGDBaseAgent.set_model)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L135){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_model

>      SGDBaseAgent.set_model (input_shape:Tuple, output_shape:Tuple)

*Set the model for the agent*

In [None]:
show_doc(SGDBaseAgent.set_optimizer)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L139){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_optimizer

>      SGDBaseAgent.set_optimizer (optimizer_params:dict)

*Set the optimizer for the model*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| optimizer_params | dict | dict with keys: optimizer, lr, weight_decay |

In [None]:
show_doc(SGDBaseAgent.set_learning_rate_scheduler)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L158){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_learning_rate_scheduler

>      SGDBaseAgent.set_learning_rate_scheduler (learning_rate_scheduler_params)

*Set learning rate scheudler (can be None)*

|    | **Details** |
| -- | ----------- |
| learning_rate_scheduler_params |  |

In [None]:
show_doc(SGDBaseAgent.fit_epoch)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L174){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.fit_epoch

>      SGDBaseAgent.fit_epoch ()

*Fit the model for one epoch using the dataloader*

In [None]:
show_doc(SGDBaseAgent.draw_action_)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L219){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.draw_action_

>      SGDBaseAgent.draw_action_ (observation:numpy.ndarray)

*Draw an action based on the fitted model (see predict method)*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| observation | ndarray |  |
| **Returns** | **ndarray** |  |

In [None]:
show_doc(SGDBaseAgent.predict)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L234){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.predict

>      SGDBaseAgent.predict (X:numpy.ndarray)

*Do one forward pass of the model and return the prediction*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| X | ndarray |  |
| **Returns** | **ndarray** |  |

In [None]:
show_doc(SGDBaseAgent.train)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L262){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.train

>      SGDBaseAgent.train ()

*set the internal state of the agent and its model to train*

In [None]:
show_doc(SGDBaseAgent.eval)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L267){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.eval

>      SGDBaseAgent.eval ()

*set the internal state of the agent and its model to eval*

In [None]:
show_doc(SGDBaseAgent.to)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L272){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.to

>      SGDBaseAgent.to (device:str)

*Move the model to the specified device*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| device | str |  |

In [None]:
show_doc(SGDBaseAgent.save)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L276){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.save

>      SGDBaseAgent.save (path:str, overwrite:bool=True)

*Save the PyTorch model to a file in the specified directory.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| path | str |  | The directory where the file will be saved. |
| overwrite | bool | True | Allow overwriting; if False, a FileExistsError will be raised if the file exists. |

In [None]:
show_doc(SGDBaseAgent.load)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L304){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.load

>      SGDBaseAgent.load (path:str)

*Load the PyTorch model from a file.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| path | str | Only the path to the folder is needed, not the file itself |

In [None]:
#| export

class NVBaseAgent(SGDBaseAgent):

    """
    Base agent for the Newsvendor problem implementing
    the loss function for the Empirical Risk Minimization (ERM) approach
    based on quantile loss.
    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                learning_rate_scheduler_params = None,  # TODO: add base class for learning rate scheduler for typing
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                obsprocessors: list | None = None,      # default: []
                device: str = "cpu", # "cuda" or "cpu"
                agent_name: str | None = None,
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile", 
                ):

        cu = self.convert_to_numpy_array(cu)
        co = self.convert_to_numpy_array(co)
        
        self.sl = cu / (cu + co) # ensure this works if cu and co are Parameters
        self.cu = cu
        self.co = co

        self.loss_function = loss_function


        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            input_shape=input_shape,
            output_shape=output_shape,
            optimizer_params=optimizer_params,
            learning_rate_scheduler_params=learning_rate_scheduler_params,
            dataset_params=dataset_params,
            dataloader_params=dataloader_params,
            obsprocessors=obsprocessors,
            device=device,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim=receive_batch_dim,
        )   
        
    def set_loss_function(self):
        
        """Set the loss function for the model to the quantile loss. For training
        the model uses quantile loss and not the pinball loss with specific cu and 
        co values to ensure similar scale of the feedback signal during training."""

        if self.loss_function == "quantile":
            self.loss_function_params = {"quantile": self.sl}
            self.loss_function = TorchQuantileLoss(reduction="mean")
            logging.debug(f"Loss function set to {self.loss_function}")

        elif self.loss_function == "pinball":
            self.loss_function_params = {"underage": self.cu, "overage": self.co}
            self.loss_function = TorchPinballLoss(reduction="mean")
            logging.debug(f"Loss function set to {self.loss_function}")

        else:
            raise ValueError(f"Loss function {self.loss_function} not supported")

In [None]:
show_doc(NVBaseAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L328){target="_blank" style="float:right; font-size:smaller"}

## NVBaseAgent

>      NVBaseAgent (environment_info:ddopnew.utils.MDPInfo,
>                   dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                   cu:numpy.ndarray|ddopnew.utils.Parameter,
>                   co:numpy.ndarray|ddopnew.utils.Parameter, input_shape:Tuple,
>                   output_shape:Tuple, optimizer_params:dict|None=None,
>                   learning_rate_scheduler_params=None,
>                   dataset_params:dict|None=None,
>                   dataloader_params:dict|None=None,
>                   obsprocessors:list|None=None, device:str='cpu',
>                   agent_name:str|None=None, test_batch_size:int=1024,
>                   receive_batch_dim:bool=False,
>                   loss_function:Literal['quantile','pinball']='quantile')

*Base agent for the Newsvendor problem implementing
the loss function for the Empirical Risk Minimization (ERM) approach
based on quantile loss.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| cu | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| co | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| optimizer_params | dict \| None | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| learning_rate_scheduler_params | NoneType | None | TODO: add base class for learning rate scheduler for typing |
| dataset_params | dict \| None | None | parameters needed to convert the dataloader to a torch dataset |
| dataloader_params | dict \| None | None | default: {"batch_size": 32, "shuffle": True} |
| obsprocessors | list \| None | None | default: [] |
| device | str | cpu | "cuda" or "cpu" |
| agent_name | str \| None | None |  |
| test_batch_size | int | 1024 |  |
| receive_batch_dim | bool | False |  |
| loss_function | Literal | quantile |  |

In [None]:
show_doc(NVBaseAgent.set_loss_function)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L381){target="_blank" style="float:right; font-size:smaller"}

### NVBaseAgent.set_loss_function

>      NVBaseAgent.set_loss_function ()

*Set the loss function for the model to the quantile loss. For training
the model uses quantile loss and not the pinball loss with specific cu and 
co values to ensure similar scale of the feedback signal during training.*

In [None]:
#| export

class NewsvendorlERMAgent(NVBaseAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a linear (regression) model. Note that this implementation finds
    the optimal regression parameters via SGD.

    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                learning_rate_scheduler_params = None,  # TODO: add base class for learning rate scheduler for typing
                model_params: dict | None = None,  # default: {"relu_output": False}
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                obsprocessors: list | None = None,  # default: []
                device: str = "cpu",  # "cuda" or "cpu"
                agent_name: str | None = "lERM",
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile", 
                ):

        # Handle mutable defaults unique to this class
        default_model_params = {
            "relu_output": False
            }

        self.model_params = self.update_model_params(default_model_params, model_params or {})

        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            cu=cu,
            co=co,
            input_shape=input_shape,
            output_shape=output_shape,
            optimizer_params=optimizer_params,
            learning_rate_scheduler_params=learning_rate_scheduler_params,
            dataloader_params=dataloader_params,
            dataset_params=dataset_params,
            obsprocessors=obsprocessors,
            device=device,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim=receive_batch_dim,
            loss_function=loss_function,
        )
    def set_model(self, input_shape, output_shape):

        """Set the model for the agent to a linear model"""

        from ddopnew.approximators import LinearModel

        # flatten time dim of input
        print("input shape", input_shape)
        input_size = np.prod(input_shape)
        output_size = output_shape[0]

        self.model = LinearModel(input_size=input_size, output_size=output_size, **self.model_params)

In [None]:
show_doc(NewsvendorlERMAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L401){target="_blank" style="float:right; font-size:smaller"}

## NewsvendorlERMAgent

>      NewsvendorlERMAgent (environment_info:ddopnew.utils.MDPInfo,
>                           dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                           cu:numpy.ndarray|ddopnew.utils.Parameter,
>                           co:numpy.ndarray|ddopnew.utils.Parameter,
>                           input_shape:Tuple, output_shape:Tuple,
>                           optimizer_params:dict|None=None,
>                           learning_rate_scheduler_params=None,
>                           model_params:dict|None=None,
>                           dataset_params:dict|None=None,
>                           dataloader_params:dict|None=None,
>                           obsprocessors:list|None=None, device:str='cpu',
>                           agent_name:str|None='lERM',
>                           test_batch_size:int=1024,
>                           receive_batch_dim:bool=False, loss_function:Literal[
>                           'quantile','pinball']='quantile')

*Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
based on a linear (regression) model. Note that this implementation finds
the optimal regression parameters via SGD.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| cu | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| co | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| optimizer_params | dict \| None | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| learning_rate_scheduler_params | NoneType | None | TODO: add base class for learning rate scheduler for typing |
| model_params | dict \| None | None | default: {"relu_output": False} |
| dataset_params | dict \| None | None | parameters needed to convert the dataloader to a torch dataset |
| dataloader_params | dict \| None | None | default: {"batch_size": 32, "shuffle": True} |
| obsprocessors | list \| None | None | default: [] |
| device | str | cpu | "cuda" or "cpu" |
| agent_name | str \| None | lERM |  |
| test_batch_size | int | 1024 |  |
| receive_batch_dim | bool | False |  |
| loss_function | Literal | quantile |  |

#### Further information:   
   
    References
    ----------
    
    .. [1] Gah-Yi Ban, Cynthia Rudin, "The Big Data Newsvendor: Practical Insights
        from Machine Learning", 2018.

In [None]:
show_doc(NewsvendorlERMAgent.set_model)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L455){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorlERMAgent.set_model

>      NewsvendorlERMAgent.set_model (input_shape, output_shape)

*Set the model for the agent to a linear model*

Example usage:

In [None]:
from ddopnew.envs.inventory.single_period import NewsvendorEnv
from ddopnew.dataloaders.tabular import XYDataLoader
from ddopnew.experiment_functions import run_experiment, test_agent

val_index_start = 800 #90_000
test_index_start = 900 #100_000

X = np.random.rand(1000, 2)
Y = np.random.rand(1000, 1)

dataloader = XYDataLoader(X, Y, val_index_start, test_index_start)

environment = NewsvendorEnv(
    dataloader = dataloader,
    underage_cost = 0.42857,
    overage_cost = 1.0,
    gamma = 0.999,
    horizon_train = 365,
)

agent = NewsvendorlERMAgent(environment.mdp_info,
                            dataloader,
                            cu=np.array([0.42857]),
                            co=np.array([1.0]),
                            input_shape=(2,),
                            output_shape=(1,),
                            optimizer_params= {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}, # other optimizers: "SGD", "RMSprop"
                            learning_rate_scheduler_params = None, # TODO add base class for learning rate scheduler for typing
                            model_params = {"relu_output": False}, #
                            dataloader_params={"batch_size": 32, "shuffle": True},
                            device = "cpu", # "cuda" or "cpu"
)

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

run_experiment(agent, environment, 2, run_id = "test") # fit agent via run_experiment function

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

INFO:root:Network architecture:


input shape (2,)
Layer (type:depth-idx)                   Output Shape              Param #
LinearModel                              [1, 1]                    --
├─Linear: 1-1                            [1, 1]                    3
├─Identity: 1-2                          [1, 1]                    --
Total params: 3
Trainable params: 3
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


INFO:root:Starting experiment
INFO:root:Initial evaluation: R=-53.74981173191295, J=-51.13917631562847
INFO:root:Starting training with epochs fit


-52.66597676734078 -50.12757372931984
Experiment directory: results/test


100%|██████████| 25/25 [00:00<00:00, 1853.75it/s]
100%|██████████| 25/25 [00:00<00:00, 1872.96it/s]
100%|██████████| 2/2 [00:00<00:00, 46.54it/s]
INFO:root:Finished training with epochs fit
INFO:root:Evaluation after training: R=-16.2603253581593, J=-15.494811098433305


-14.83217868015756 -14.120542478739525


In [None]:
#| export

class NewsvendorDLAgent(NVBaseAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a deep learning model. 
    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                learning_rate_scheduler_params: Dict | None = None,  
                
                # parameters in yaml file
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                model_params: dict | None = None,  # default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False}
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                device: str = "cpu", # "cuda" or "cpu"

                obsprocessors: list | None = None,  # default: []
                agent_name: str | None = "DLNV",
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile",
                ):

        # Handle mutable defaults unique to this class
        default_model_params = {
            "hidden_layers": [64, 64],
            "drop_prob": 0.0,
            "batch_norm": False,
            "relu_output": False
            }

        self.model_params = self.update_model_params(default_model_params, model_params or {})

        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            cu=cu,
            co=co,
            input_shape=input_shape,
            output_shape=output_shape,
            optimizer_params=optimizer_params,
            learning_rate_scheduler_params=learning_rate_scheduler_params,
            dataloader_params=dataloader_params,
            dataset_params=dataset_params,
            obsprocessors=obsprocessors,
            device=device,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim=receive_batch_dim,
            loss_function=loss_function,
        )
        
    def set_model(self, input_shape, output_shape):
        
        """Set the model for the agent to an MLP"""

        # flatten time dim of input
        print("input shape", input_shape)
        input_size = np.prod(input_shape)
        output_size = output_shape[0]

        from ddopnew.approximators import MLP
        self.model = MLP(input_size=input_size, output_size=output_size, **self.model_params)

In [None]:
show_doc(NewsvendorDLAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L469){target="_blank" style="float:right; font-size:smaller"}

## NewsvendorDLAgent

>      NewsvendorDLAgent (environment_info:ddopnew.utils.MDPInfo,
>                         dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                         cu:numpy.ndarray|ddopnew.utils.Parameter,
>                         co:numpy.ndarray|ddopnew.utils.Parameter,
>                         input_shape:Tuple, output_shape:Tuple,
>                         learning_rate_scheduler_params:Optional[Dict]=None,
>                         optimizer_params:dict|None=None,
>                         model_params:dict|None=None,
>                         dataloader_params:dict|None=None,
>                         dataset_params:dict|None=None, device:str='cpu',
>                         obsprocessors:list|None=None,
>                         agent_name:str|None='DLNV', test_batch_size:int=1024,
>                         receive_batch_dim:bool=False, loss_function:Literal['q
>                         uantile','pinball']='quantile')

*Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
based on a deep learning model.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| cu | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| co | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| learning_rate_scheduler_params | Optional | None |  |
| optimizer_params | dict \| None | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| model_params | dict \| None | None | default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False} |
| dataloader_params | dict \| None | None | default: {"batch_size": 32, "shuffle": True} |
| dataset_params | dict \| None | None | parameters needed to convert the dataloader to a torch dataset |
| device | str | cpu | "cuda" or "cpu" |
| obsprocessors | list \| None | None | default: [] |
| agent_name | str \| None | DLNV |  |
| test_batch_size | int | 1024 |  |
| receive_batch_dim | bool | False |  |
| loss_function | Literal | quantile |  |

#### Further information:   
   
    References
    ----------
    
    .. [1] Afshin Oroojlooyjadid, Lawrence V. Snyder, Martin Takáˇc,
            "Applying Deep Learning to the Newsvendor Problem", 2018.

In [None]:
show_doc(NewsvendorDLAgent.set_model)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L528){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorDLAgent.set_model

>      NewsvendorDLAgent.set_model (input_shape, output_shape)

*Set the model for the agent to an MLP*

Example usage:

In [None]:
dataloader = XYDataLoader(X, Y, val_index_start, test_index_start)

environment = NewsvendorEnv(
    dataloader = dataloader,
    underage_cost = 0.42857,
    overage_cost = 1.0,
    gamma = 0.999,
    horizon_train = 365,
)

model_params = {
    "hidden_layers": [64, 64],
}

agent = NewsvendorDLAgent(environment.mdp_info,
                            dataloader,
                            cu=np.array([0.42857]),
                            co=np.array([1.0]),
                            input_shape=(2,),
                            output_shape=(1,),
                            optimizer_params= {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}, # other optimizers: "SGD", "RMSprop"
                            learning_rate_scheduler_params = None, # TODO add base class for learning rate scheduler for typing
                            model_params = model_params, #
                            dataloader_params={"batch_size": 32, "shuffle": True},
                            device = "cpu" # "cuda" or "cpu"
)

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

run_experiment(agent, environment, 2, run_id = "test") # fit agent via run_experiment function

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

INFO:root:Network architecture:


input shape (2,)
Layer (type:depth-idx)                   Output Shape              Param #
MLP                                      [1, 1]                    --
├─Sequential: 1-1                        [1, 1]                    --
│    └─Linear: 2-1                       [1, 64]                   192
│    └─ReLU: 2-2                         [1, 64]                   --
│    └─Dropout: 2-3                      [1, 64]                   --
│    └─Linear: 2-4                       [1, 64]                   4,160
│    └─ReLU: 2-5                         [1, 64]                   --
│    └─Dropout: 2-6                      [1, 64]                   --
│    └─Linear: 2-7                       [1, 1]                    65
│    └─Identity: 2-8                     [1, 1]                    --
Total params: 4,417
Trainable params: 4,417
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.02
Estimated Total Size (MB): 

INFO:root:Starting experiment
INFO:root:Initial evaluation: R=-28.597681257306558, J=-27.220163205759015
INFO:root:Starting training with epochs fit


-26.989019584755987 -25.663410208519274
Experiment directory: results/test


100%|██████████| 25/25 [00:00<00:00, 1103.13it/s]
100%|██████████| 25/25 [00:00<00:00, 1337.54it/s]
100%|██████████| 2/2 [00:00<00:00, 30.57it/s]
INFO:root:Finished training with epochs fit
INFO:root:Evaluation after training: R=-16.16906666127553, J=-15.414261035575393


-14.93922111198494 -14.254037954757864


In [None]:
#| export
class BaseMetaAgent():

    def set_meta_dataloader(
        self, 
        dataloader: BaseDataLoader,
        dataset_params: dict, # parameters needed to convert the dataloader to a torch dataset
        dataloader_params: dict, # dict with keys: batch_size, shuffle
        ) -> None:

        """ """

        dataset = DatasetWrapperMeta(dataloader, **dataset_params)

        self.dataloader = torch.utils.data.DataLoader(dataset, **dataloader_params)

In [None]:
#| export

class NewsvendorlERMMetaAgent(NewsvendorlERMAgent, BaseMetaAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a linear (regression) model. In addition to the features, the agent
    also gets the sl as input to be able to forecast the optimal order quantity
    for different sl values. Depending on the training pipeline, this model can be 
    adapted to become a full meta-learning algorithm cross products and cross sls.

    """

    def __init__(self, 

                # Parameters for lERM agent
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                learning_rate_scheduler_params = None,  # TODO: add base class for learning rate scheduler for typing
                model_params: dict | None = None,  # default: {"relu_output": False}
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                obsprocessors: list | None = None,  # default: []
                device: str = "cpu",  # "cuda" or "cpu"
                agent_name: str | None = "lERMMeta",
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile",
                ):

        self.set_meta_dataloader(dataloader, dataset_params, dataloader_params)

        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            cu=cu,
            co=co,
            input_shape=input_shape,
            output_shape=output_shape,
            optimizer_params=optimizer_params,
            learning_rate_scheduler_params=learning_rate_scheduler_params,
            model_params=model_params,
            dataloader_params=dataloader_params,
            obsprocessors=obsprocessors,
            device=device,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim = receive_batch_dim,
            loss_function=loss_function,
        )

In [None]:
#| export

class NewsvendorDLMetaAgent(NewsvendorDLAgent, BaseMetaAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a Neural Network. In addition to the features, the agent
    also gets the sl as input to be able to forecast the optimal order quantity
    for different sl values. Depending on the training pipeline, this model can be 
    adapted to become a full meta-learning algorithm cross products and cross sls.

    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                learning_rate_scheduler_params = None,  # TODO: add base class for learning rate scheduler for typing
                
                # parameters in yaml file
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                model_params: dict | None = None,  # default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False}
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                device: str = "cpu", # "cuda" or "cpu"

                obsprocessors: list | None = None,  # default: []
                agent_name: str | None = "DLNV",
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile",
                ):

        self.set_meta_dataloader(dataloader, dataset_params, dataloader_params)

        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            cu=cu,
            co=co,
            input_shape=input_shape,
            output_shape=output_shape,
            learning_rate_scheduler_params=learning_rate_scheduler_params,

            optimizer_params=optimizer_params,
            model_params=model_params,
            dataloader_params=dataloader_params,
            device=device,

            obsprocessors=obsprocessors,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim=receive_batch_dim,
            loss_function=loss_function,
        )


In [None]:
#| export

class NewsvendorDLTransformerAgent(NVBaseAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a deep learning model with a Transformer architecture.
    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                learning_rate_scheduler_params: Dict | None = None,
                
                # parameters in yaml file
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                model_params: dict | None = None,  # default: {"max_context_length": 128, "n_layer": 3, "n_head": 8, "n_embd_per_head": 32, "rope_scaling": None, "min_multiple": 256, "gating": True, "drop_prob": 0.0, "final_activation": "identity"}
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                device: str = "cpu", # "cuda" or "cpu"

                obsprocessors: list | None = None,  # default: []
                agent_name: str | None = "DLNV",
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile",
                ):

        # Handle mutable defaults unique to this class
        default_model_params = {
            "max_context_length": 128,
            "n_layer": 3,
            "n_head": 8,
            "n_embd_per_head": 32,
            "rope_scaling": None,

            "min_multiple": 256,
            "gating": True,

            "drop_prob": 0.0,
            "final_activation": "identity",
            }
        
        self.model_params = self.update_model_params(default_model_params, model_params or {})


        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            cu=cu,
            co=co,
            input_shape=input_shape,
            output_shape=output_shape,
            optimizer_params=optimizer_params,
            learning_rate_scheduler_params=learning_rate_scheduler_params,
            dataset_params=dataset_params,
            dataloader_params=dataloader_params,
            obsprocessors=obsprocessors,
            device=device,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim=receive_batch_dim,
            loss_function=loss_function,
        )
         
    def set_model(self, input_shape, output_shape):
        
        """Set the model for the agent to an MLP"""

        if len(input_shape) == 1:
            raise ValueError("Input shape must be at least 2D for Transformer model")

        output_size = output_shape[0]

        from ddopnew.approximators import Transformer
        self.model = Transformer(input_size=input_shape, output_size=output_size, **self.model_params)

In [None]:
#| export

class NewsvendorDLTransformerMetaAgent(NewsvendorDLTransformerAgent, BaseMetaAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a Neural Network using the attention mechanism. In addition to the features,
    the agent also gets the sl as input to be able to forecast the optimal order quantity
    for different sl values. Depending on the training pipeline, this model can be 
    adapted to become a full meta-learning algorithm cross products and cross sls.

    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                learning_rate_scheduler_params: Dict | None = None, 
                
                # parameters in yaml file
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                model_params: dict | None = None,  # default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False}
                dataset_params: dict | None = None, # parameters needed to convert the dataloader to a torch dataset
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                device: str = "cpu", # "cuda" or "cpu"

                obsprocessors: list | None = None,  # default: []
                agent_name: str | None = "DLNV",
                test_batch_size: int = 1024,
                receive_batch_dim: bool = False,
                loss_function: Literal["quantile", "pinball"] = "quantile",
                ):

        self.set_meta_dataloader(dataloader, dataset_params, dataloader_params)

        super().__init__(
            environment_info=environment_info,
            dataloader=dataloader,
            cu=cu,
            co=co,
            input_shape=input_shape,
            output_shape=output_shape,
            learning_rate_scheduler_params=learning_rate_scheduler_params,

            optimizer_params=optimizer_params,
            model_params=model_params,
            dataloader_params=dataloader_params,
            device=device,

            obsprocessors=obsprocessors,
            agent_name=agent_name,
            test_batch_size=test_batch_size,
            receive_batch_dim=receive_batch_dim,
            loss_function=loss_function,
        )


In [None]:
#| hide
import nbdev; nbdev.nbdev_export() 