# ERM agents

> Newsvendor agents based on Empirical Risk Minimization (ERM) principles.

In [None]:
#| default_exp agents.newsvendor.erm

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export

import logging

from abc import ABC, abstractmethod
from typing import Union, Optional, List, Tuple
import numpy as np
import os


from ddopnew.envs.base import BaseEnvironment
from ddopnew.agents.base import BaseAgent
from ddopnew.utils import MDPInfo, Parameter, DatasetWrapper
from ddopnew.torch_utils.loss_functions import TorchQuantileLoss
from ddopnew.torch_utils.preprocessors import FlattenTimeDim

from ddopnew.dataloaders.base import BaseDataLoader

import torch

In [None]:
#| export

class SGDBaseAgent(BaseAgent):

    """
    Base class for Agents that are trained using Stochastic Gradient Descent (SGD) on PyTorch models.
    """

    train_mode = "epochs_fit"
    
    def __init__(self, 
            environment_info: MDPInfo,
            dataloader: BaseDataLoader,
            input_shape: int,
            output_shape: int,
            optimizer_params: Optional[dict] = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
            learning_rate_scheduler = None,  # TODO: add base class for learning rate scheduler for typing
            dataloader_params: Optional[dict] = None, # default: {"batch_size": 32, "shuffle": True}
            preprocessors: Optional[List] = None,     # default: []
            postprocessors: Optional[List] = None,     # default: []
            torch_preprocessors: Optional[List] = None,     # default: []
            device: str = "cpu", # "cuda" or "cpu"
            agent_name: str | None = None
            ):

        # Initialize default values for mutable arguments
        optimizer_params = optimizer_params or {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
        dataloader_params = dataloader_params or {"batch_size": 32, "shuffle": True}
        self.torch_preprocessors = torch_preprocessors or []

        self.device = device
        
        self.set_dataloader(dataloader, dataloader_params)
        self.set_model(input_shape, output_shape)
        self.set_loss_function()
        self.set_optimizer(optimizer_params)
        self.set_learning_rate_scheduler(learning_rate_scheduler)

        super().__init__(environment_info, preprocessors, postprocessors, agent_name)

    def set_dataloader(self,
                        dataloader: BaseDataLoader,
                        dataloader_params: dict, # dict with keys: batch_size, shuffle
                        ) -> None: 

        """
        Set the dataloader for the agent by wrapping it into a Torch Dataset
        
        """
        dataset = DatasetWrapper(dataloader)
        self.dataloader = torch.utils.data.DataLoader(dataset, **dataloader_params)

    @abstractmethod
    def set_loss_function(self):
        """ Set loss function for the model """
        pass

    @abstractmethod
    def set_model(self, input_shape: Tuple, output_shape: Tuple):
        """ Set the model for the agent """
        pass

    def set_optimizer(self, optimizer_params: dict): # dict with keys: optimizer, lr, weight_decay
        
        """ Set the optimizer for the model """
        optimizer = optimizer_params["optimizer"]
        optimizer_params_copy = optimizer_params.copy()
        del optimizer_params_copy["optimizer"]

        if optimizer == "Adam":
            self.optimizer = torch.optim.Adam(self.model.parameters(), **optimizer_params_copy)
        elif optimizer == "SGD":
            self.optimizer = torch.optim.SGD(self.model.parameters(), **optimizer_params_copy)
        elif optimizer == "RMSprop":
            self.optimizer = torch.optim.RMSprop(self.model.parameters(), **optimizer_params_copy)
        else:
            raise ValueError(f"Optimizer {optimizer} not supported")
        
    def set_learning_rate_scheduler(self, learning_rate_scheduler: None = None): #
        """ Set learning rate scheudler (can be None) """
        if learning_rate_scheduler is not None:
            raise NotImplementedError("Learning rate scheduler not implemented yet")
        else:
            self.learning_rate_scheduler = None

    def fit_epoch(self):

        """ Fit the model for one epoch using the dataloader """

        device = next(self.model.parameters()).device
        self.model.train()
        total_loss=0

        for i, output in enumerate(self.dataloader):
            
            X, y = output

            # convert X and y to float32
            X = X.type(torch.float32)
            y = y.type(torch.float32)

            for torch_preprocessor in self.torch_preprocessors:
                X = torch_preprocessor(X)
            
            X, y = X.to(device), y.to(device)

            self.optimizer.zero_grad()

            y_pred = self.model(X)

            if self.loss_function_params==None:
                loss = self.loss_function(y_pred, y)
            else:
                loss = self.loss_function(y_pred, y, **self.loss_function_params) # TODO: add reduction param when defining loss function

            loss.backward()
            self.optimizer.step()
        
            total_loss += loss.item()
        
        self.model.eval()
        
        return total_loss

    def draw_action_(self, observation: np.ndarray) -> np.ndarray: #
        
        """ 
        Draw an action based on the fitted model (see predict method)
        """
        
        action = self.predict(observation)
        
        return action

    def predict(self, X: np.ndarray) -> np.ndarray: #
        """ Do one forward pass of the model and return the prediction """

        # TODO handle if X is larger than some size, then split into batches

        device = next(self.model.parameters()).device
        self.model.eval()

        X = torch.tensor(X, dtype=torch.float32)
        for torch_preprocessor in self.torch_preprocessors:
            X = torch_preprocessor(X)
        X = X.to(device)

        with torch.no_grad():

            y_pred = self.model(X)


        y_pred = y_pred.cpu().numpy()

        return y_pred

    def train(self):
        """set the internal state of the agent and its model to train"""
        self.mode = "train"
        self.model.train()

    def eval(self):
        """set the internal state of the agent and its model to eval"""
        self.mode = "eval"
        self.model.eval()

    def to(self, device: str): #
        """Move the model to the specified device"""
        self.model.to(device)

    def save(self,
                path: str, # The directory where the file will be saved.
                overwrite: bool=True): # Allow overwriting; if False, a FileExistsError will be raised if the file exists.
        
        """
        Save the PyTorch model to a file in the specified directory.

        """
        
        if not hasattr(self, 'model') or self.model is None:
            raise AttributeError("Model is not defined in the class.")

        # Create the directory path if it does not exist
        os.makedirs(path, exist_ok=True)

        # Construct the file path using os.path.join for better cross-platform compatibility
        full_path = os.path.join(path, "model.pth")

        if os.path.exists(full_path):
            if not overwrite:
                raise FileExistsError(f"The file {full_path} already exists and will not be overwritten.")
            else:
                logging.info(f"Overwriting file {full_path}") # Only log with info as during training we will continuously overwrite the model
        
        # Save the model's state_dict using torch.save
        torch.save(self.model.state_dict(), full_path)
        logging.info(f"Model saved successfully to {full_path}")

    def load(self, path: str): # Only the path to the folder is needed, not the file itself
 
        """
        Load the PyTorch model from a file.
        """
        
        if not hasattr(self, 'model') or self.model is None:
            raise AttributeError("Model is not defined in the class.")

        # Construct the file path
        full_path = os.path.join(path, "model.pth")

        if not os.path.exists(full_path):
            raise FileNotFoundError(f"The file {full_path} does not exist.")

        try:
            # Load the model's state_dict using torch.load
            self.model.load_state_dict(torch.load(full_path))
            logging.info(f"Model loaded successfully from {full_path}")
        except Exception as e:
            raise RuntimeError(f"An error occurred while loading the model: {e}")
    

In [None]:
show_doc(SGDBaseAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L26){target="_blank" style="float:right; font-size:smaller"}

## SGDBaseAgent

>      SGDBaseAgent (environment_info:ddopnew.utils.MDPInfo,
>                    dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                    input_shape:int, output_shape:int,
>                    optimizer_params:Optional[dict]=None,
>                    learning_rate_scheduler=None,
>                    dataloader_params:Optional[dict]=None,
>                    preprocessors:Optional[List]=None,
>                    postprocessors:Optional[List]=None,
>                    torch_preprocessors:Optional[List]=None, device:str='cpu',
>                    agent_name:str|None=None)

*Base class for Agents that are trained using Stochastic Gradient Descent (SGD) on PyTorch models.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| input_shape | int |  |  |
| output_shape | int |  |  |
| optimizer_params | Optional | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| learning_rate_scheduler | NoneType | None | TODO: add base class for learning rate scheduler for typing |
| dataloader_params | Optional | None | default: {"batch_size": 32, "shuffle": True} |
| preprocessors | Optional | None | default: [] |
| postprocessors | Optional | None | default: [] |
| torch_preprocessors | Optional | None | default: [] |
| device | str | cpu | "cuda" or "cpu" |
| agent_name | str \| None | None |  |

### Important notes:

SGD-based agents are all agents that are trained via SGD such as Linear Models or Neural Networks. Some specific requirements are necessary to make them interface properly with the environment.

**Torch perprocessors**:

* In addition to the general Numpy-based pre-processor, we also provide pre-processors that work on tensor level within the ```fit_epoch``` method and the ```predict``` method. They can be used in addition to the numpy-based pre-processors or instead of them. It's important to ensure that the shape of observations (after pre-processing) is the same for those from the environemnt and those from the dataloader during training.

**Dataloader**:

* As for normal supervised learning via Torch, we make use of the Torch dataloader to load the data. Instead of defining a custom dataset class, we provide a Wrapper that can be used around our dataloader to make its output and interface the same as a Torch dataset. The dataloader is then initialized when the agent is created such that the agent has access to the same dataloader as the environment.
 
**Training process**:

* The outper loop of the training process (epochs) is handled outside the agent by the ```run_experiment```functions (or can also be customized). The agent needs to have a ```fit_epoch``` method that tells the agent what to do within an epoch. 
This includes:
    * Getting the data from the dataloader
    * Pre-processing the data
    * Forward pass
    * Loss calculation
    * Backward pass

In [None]:
show_doc(SGDBaseAgent.set_dataloader)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L64){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_dataloader

>      SGDBaseAgent.set_dataloader
>                                   (dataloader:ddopnew.dataloaders.base.BaseDat
>                                   aLoader, dataloader_params:dict)

*Set the dataloader for the agent by wrapping it into a Torch Dataset*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| dataloader | BaseDataLoader |  |
| dataloader_params | dict | dict with keys: batch_size, shuffle |
| **Returns** | **None** |  |

In [None]:
show_doc(SGDBaseAgent.set_loss_function)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L77){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_loss_function

>      SGDBaseAgent.set_loss_function ()

*Set loss function for the model*

In [None]:
show_doc(SGDBaseAgent.set_model)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L82){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_model

>      SGDBaseAgent.set_model (input_shape:Tuple, output_shape:Tuple)

*Set the model for the agent*

In [None]:
show_doc(SGDBaseAgent.set_optimizer)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L86){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_optimizer

>      SGDBaseAgent.set_optimizer (optimizer_params:dict)

*Set the optimizer for the model*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| optimizer_params | dict | dict with keys: optimizer, lr, weight_decay |

In [None]:
show_doc(SGDBaseAgent.set_learning_rate_scheduler)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L102){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.set_learning_rate_scheduler

>      SGDBaseAgent.set_learning_rate_scheduler
>                                                (learning_rate_scheduler:None=N
>                                                one)

*Set learning rate scheudler (can be None)*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| learning_rate_scheduler | None | None |  |

In [None]:
show_doc(SGDBaseAgent.fit_epoch)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L109){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.fit_epoch

>      SGDBaseAgent.fit_epoch ()

*Fit the model for one epoch using the dataloader*

In [None]:
show_doc(SGDBaseAgent.draw_action_)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L148){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.draw_action_

>      SGDBaseAgent.draw_action_ (observation:numpy.ndarray)

*Draw an action based on the fitted model (see predict method)*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| observation | ndarray |  |
| **Returns** | **ndarray** |  |

In [None]:
show_doc(SGDBaseAgent.predict)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L158){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.predict

>      SGDBaseAgent.predict (X:numpy.ndarray)

*Do one forward pass of the model and return the prediction*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| X | ndarray |  |
| **Returns** | **ndarray** |  |

In [None]:
show_doc(SGDBaseAgent.train)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L180){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.train

>      SGDBaseAgent.train ()

*set the internal state of the agent and its model to train*

In [None]:
show_doc(SGDBaseAgent.eval)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L185){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.eval

>      SGDBaseAgent.eval ()

*set the internal state of the agent and its model to eval*

In [None]:
show_doc(SGDBaseAgent.to)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L190){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.to

>      SGDBaseAgent.to (device:str)

*Move the model to the specified device*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| device | str |  |

In [None]:
show_doc(SGDBaseAgent.save)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L194){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.save

>      SGDBaseAgent.save (path:str, overwrite:bool=True)

*Save the PyTorch model to a file in the specified directory.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| path | str |  | The directory where the file will be saved. |
| overwrite | bool | True | Allow overwriting; if False, a FileExistsError will be raised if the file exists. |

In [None]:
show_doc(SGDBaseAgent.load)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L222){target="_blank" style="float:right; font-size:smaller"}

### SGDBaseAgent.load

>      SGDBaseAgent.load (path:str)

*Load the PyTorch model from a file.*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| path | str | Only the path to the folder is needed, not the file itself |

In [None]:
#| export

class NVBaseAgent(SGDBaseAgent):

    """
    Base agent for the Newsvendor problem implementing
    the loss function for the Empirical Risk Minimization (ERM) approach
    based on quantile loss.
    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                learning_rate_scheduler = None,  # TODO: add base class for learning rate scheduler for typing
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                preprocessors: list | None = None,      # default: []
                postprocessors: list | None = None,     # default: []
                torch_preprocessors: list | None = None,  # default: []
                device: str = "cpu", # "cuda" or "cpu"
                agent_name: str | None = None,
                ):

        
        cu = self.convert_to_numpy_array(cu)
        co = self.convert_to_numpy_array(co)
        
        self.sl = cu / (cu + co) # ensure this works if cu and co are Parameters

        super().__init__(environment_info, dataloader, input_shape, output_shape, optimizer_params, learning_rate_scheduler, dataloader_params, preprocessors, postprocessors,torch_preprocessors, device, agent_name)


    def set_loss_function(self):

        """Set the loss function for the model to the quantile loss. For training
        the model uses quantile loss and not the pinball loss with specific cu and 
        co values to ensure similar scale of the feedback signal during training."""

        self.loss_function_params = {"quantile": self.sl}
        self.loss_function = TorchQuantileLoss(reduction="mean")
        
        logging.debug(f"Loss function set to {self.loss_function}")

In [None]:
show_doc(NVBaseAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L246){target="_blank" style="float:right; font-size:smaller"}

## NVBaseAgent

>      NVBaseAgent (environment_info:ddopnew.utils.MDPInfo,
>                   dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                   cu:numpy.ndarray|ddopnew.utils.Parameter,
>                   co:numpy.ndarray|ddopnew.utils.Parameter, input_shape:Tuple,
>                   output_shape:Tuple, optimizer_params:dict|None=None,
>                   learning_rate_scheduler=None,
>                   dataloader_params:dict|None=None,
>                   preprocessors:list|None=None, postprocessors:list|None=None,
>                   torch_preprocessors:list|None=None, device:str='cpu',
>                   agent_name:str|None=None)

*Base agent for the Newsvendor problem implementing
the loss function for the Empirical Risk Minimization (ERM) approach
based on quantile loss.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| cu | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| co | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| optimizer_params | dict \| None | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| learning_rate_scheduler | NoneType | None | TODO: add base class for learning rate scheduler for typing |
| dataloader_params | dict \| None | None | default: {"batch_size": 32, "shuffle": True} |
| preprocessors | list \| None | None | default: [] |
| postprocessors | list \| None | None | default: [] |
| torch_preprocessors | list \| None | None | default: [] |
| device | str | cpu | "cuda" or "cpu" |
| agent_name | str \| None | None |  |

In [None]:
show_doc(NVBaseAgent.set_loss_function)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L280){target="_blank" style="float:right; font-size:smaller"}

### NVBaseAgent.set_loss_function

>      NVBaseAgent.set_loss_function ()

*Set the loss function for the model to the quantile loss. For training
the model uses quantile loss and not the pinball loss with specific cu and 
co values to ensure similar scale of the feedback signal during training.*

In [None]:
#| export

class NewsvendorlERMAgent(NVBaseAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a linear (regression) model. Note that this implementation finds
    the optimal regression parameters via SGD.

    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                learning_rate_scheduler = None,  # TODO: add base class for learning rate scheduler for typing
                model_params: dict | None = None,  # default: {"relu_output": False}
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                preprocessors: list | None = None,  # default: []
                postprocessors: list | None = None,  # default: []
                torch_preprocessors: list | None = None,  # default: [FlattenTimeDim(allow_2d=False)]
                device: str = "cpu",  # "cuda" or "cpu"
                agent_name: str | None = "lERM"
                ):

        # Handle mutable defaults unique to this class
        default_model_params = {
            "relu_output": False
            }

        self.model_params = self.update_model_params(default_model_params, model_params or {})

        # By default automatically flatten the time dimension of data, if it is not already 2D
        torch_preprocessors = [FlattenTimeDim(allow_2d=True)] if torch_preprocessors is None else torch_preprocessors

        super().__init__(environment_info, dataloader, cu, co, input_shape, output_shape, optimizer_params, learning_rate_scheduler, dataloader_params, preprocessors, postprocessors, torch_preprocessors, device, agent_name)
    
    def set_model(self, input_shape, output_shape):

        """Set the model for the agent to a linear model"""

        from ddopnew.approximators import LinearModel

        # flatten time dim of input
        input_size = np.prod(input_shape)
        output_size = output_shape[0]

        self.model = LinearModel(input_size=input_size, output_size=output_size, **self.model_params)

In [None]:
show_doc(NewsvendorlERMAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L292){target="_blank" style="float:right; font-size:smaller"}

## NewsvendorlERMAgent

>      NewsvendorlERMAgent (environment_info:ddopnew.utils.MDPInfo,
>                           dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                           cu:numpy.ndarray|ddopnew.utils.Parameter,
>                           co:numpy.ndarray|ddopnew.utils.Parameter,
>                           input_shape:Tuple, output_shape:Tuple,
>                           optimizer_params:dict|None=None,
>                           learning_rate_scheduler=None,
>                           model_params:dict|None=None,
>                           dataloader_params:dict|None=None,
>                           preprocessors:list|None=None,
>                           postprocessors:list|None=None,
>                           torch_preprocessors:list|None=None,
>                           device:str='cpu', agent_name:str|None='lERM')

*Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
based on a linear (regression) model. Note that this implementation finds
the optimal regression parameters via SGD.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| cu | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| co | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| optimizer_params | dict \| None | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| learning_rate_scheduler | NoneType | None | TODO: add base class for learning rate scheduler for typing |
| model_params | dict \| None | None | default: {"relu_output": False} |
| dataloader_params | dict \| None | None | default: {"batch_size": 32, "shuffle": True} |
| preprocessors | list \| None | None | default: [] |
| postprocessors | list \| None | None | default: [] |
| torch_preprocessors | list \| None | None | default: [FlattenTimeDim(allow_2d=False)] |
| device | str | cpu | "cuda" or "cpu" |
| agent_name | str \| None | lERM |  |

#### Further information:   
   
    References
    ----------
    
    .. [1] Gah-Yi Ban, Cynthia Rudin, "The Big Data Newsvendor: Practical Insights
        from Machine Learning", 2018.

In [None]:
show_doc(NewsvendorlERMAgent.set_model)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L331){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorlERMAgent.set_model

>      NewsvendorlERMAgent.set_model (input_shape, output_shape)

*Set the model for the agent to a linear model*

Example usage:

In [None]:
from ddopnew.envs.inventory import NewsvendorEnv
from ddopnew.dataloaders.tabular import XYDataLoader
from ddopnew.experiment_functions import run_experiment, test_agent

val_index_start = 800 #90_000
test_index_start = 900 #100_000

X = np.random.rand(1000, 2)
Y = np.random.rand(1000, 1)

dataloader = XYDataLoader(X, Y, val_index_start, test_index_start)

environment = NewsvendorEnv(
    dataloader = dataloader,
    underage_cost = 0.42857,
    overage_cost = 1.0,
    gamma = 0.999,
    horizon_train = 365,
)

agent = NewsvendorlERMAgent(environment.mdp_info,
                            dataloader,
                            cu=np.array([0.42857]),
                            co=np.array([1.0]),
                            input_shape=(2,),
                            output_shape=(1,),
                            optimizer_params= {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}, # other optimizers: "SGD", "RMSprop"
                            learning_rate_scheduler = None, # TODO add base class for learning rate scheduler for typing
                            model_params = {"relu_output": False}, #
                            dataloader_params={"batch_size": 32, "shuffle": True},
                            torch_preprocessors = [],
                            device = "cpu", # "cuda" or "cpu"
)

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

run_experiment(agent, environment, 2, run_id = "test") # fit agent via run_experiment function

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

-19.723870891879358 -18.81128201610496
-16.557284640159896 -15.787909485147676


In [None]:
#| export

class NewsvendorDLAgent(NVBaseAgent):

    """
    Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
    based on a deep learning model. 
    """

    def __init__(self, 
                environment_info: MDPInfo,
                dataloader: BaseDataLoader,
                cu: np.ndarray | Parameter,
                co: np.ndarray | Parameter,
                input_shape: Tuple,
                output_shape: Tuple,
                learning_rate_scheduler = None,  # TODO: add base class for learning rate scheduler for typing
                
                # parameters in yaml file
                optimizer_params: dict | None = None,  # default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}
                model_params: dict | None = None,  # default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False}
                dataloader_params: dict | None = None,  # default: {"batch_size": 32, "shuffle": True}
                device: str = "cpu", # "cuda" or "cpu"

                preprocessors: list | None = None,  # default: []
                postprocessors: list | None = None,  # default: []
                torch_preprocessors: list | None = None,  # default: [FlattenTimeDim(allow_2d=False)]
                agent_name: str | None = "DLNV",
                ):

        # Handle mutable defaults unique to this class
        default_model_params = {
            "hidden_layers": [64, 64],
            "drop_prob": 0.0,
            "batch_norm": False,
            "relu_output": False
            }

        self.model_params = self.update_model_params(default_model_params, model_params or {})
        
        torch_preprocessors = [FlattenTimeDim(allow_2d=True)] if torch_preprocessors is None else torch_preprocessors

        super().__init__(environment_info, dataloader, cu, co, input_shape, output_shape, optimizer_params, learning_rate_scheduler, dataloader_params, preprocessors, postprocessors, torch_preprocessors, device, agent_name)
    
    def set_model(self, input_shape, output_shape):
        
        """Set the model for the agent to an MLP"""


        # flatten time dim of input
        input_size = np.prod(input_shape)
        output_size = output_shape[0]

        from ddopnew.approximators import MLP
        self.model = MLP(input_size=input_size, output_size=output_size, **self.model_params)

In [None]:
show_doc(NewsvendorDLAgent, title_level=2)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L339){target="_blank" style="float:right; font-size:smaller"}

## NewsvendorDLAgent

>      NewsvendorDLAgent (environment_info:ddopnew.utils.MDPInfo,
>                         dataloader:ddopnew.dataloaders.base.BaseDataLoader,
>                         cu:numpy.ndarray|ddopnew.utils.Parameter,
>                         co:numpy.ndarray|ddopnew.utils.Parameter,
>                         input_shape:Tuple, output_shape:Tuple,
>                         learning_rate_scheduler=None,
>                         optimizer_params:dict|None=None,
>                         model_params:dict|None=None,
>                         dataloader_params:dict|None=None, device:str='cpu',
>                         preprocessors:list|None=None,
>                         postprocessors:list|None=None,
>                         torch_preprocessors:list|None=None,
>                         agent_name:str|None='DLNV')

*Newsvendor agent implementing Empirical Risk Minimization (ERM) approach 
based on a deep learning model.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| environment_info | MDPInfo |  |  |
| dataloader | BaseDataLoader |  |  |
| cu | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| co | numpy.ndarray \| ddopnew.utils.Parameter |  |  |
| input_shape | Tuple |  |  |
| output_shape | Tuple |  |  |
| learning_rate_scheduler | NoneType | None | TODO: add base class for learning rate scheduler for typing |
| optimizer_params | dict \| None | None | default: {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0} |
| model_params | dict \| None | None | default: {"hidden_layers": [64, 64], "drop_prob": 0.0, "batch_norm": False, "relu_output": False} |
| dataloader_params | dict \| None | None | default: {"batch_size": 32, "shuffle": True} |
| device | str | cpu | "cuda" or "cpu" |
| preprocessors | list \| None | None | default: [] |
| postprocessors | list \| None | None | default: [] |
| torch_preprocessors | list \| None | None | default: [FlattenTimeDim(allow_2d=False)] |
| agent_name | str \| None | DLNV |  |

#### Further information:   
   
    References
    ----------
    
    .. [1] Afshin Oroojlooyjadid, Lawrence V. Snyder, Martin Takáˇc,
            "Applying Deep Learning to the Newsvendor Problem", 2018.

In [None]:
show_doc(NewsvendorDLAgent.set_model)

---

[source](https://github.com/opimwue/ddopnew/blob/main/ddopnew/agents/newsvendor/erm.py#L381){target="_blank" style="float:right; font-size:smaller"}

### NewsvendorDLAgent.set_model

>      NewsvendorDLAgent.set_model (input_shape, output_shape)

*Set the model for the agent to an MLP*

Example usage:

In [None]:
dataloader = XYDataLoader(X, Y, val_index_start, test_index_start)

environment = NewsvendorEnv(
    dataloader = dataloader,
    underage_cost = 0.42857,
    overage_cost = 1.0,
    gamma = 0.999,
    horizon_train = 365,
)

model_params = {
    "hidden_layers": [64, 64],
}

agent = NewsvendorDLAgent(environment.mdp_info,
                            dataloader,
                            cu=np.array([0.42857]),
                            co=np.array([1.0]),
                            input_shape=(2,),
                            output_shape=(1,),
                            optimizer_params= {"optimizer": "Adam", "lr": 0.01, "weight_decay": 0.0}, # other optimizers: "SGD", "RMSprop"
                            learning_rate_scheduler = None, # TODO add base class for learning rate scheduler for typing
                            model_params = model_params, #
                            dataloader_params={"batch_size": 32, "shuffle": True},
                            torch_preprocessors = [],
                            device = "cpu" # "cuda" or "cpu"
)

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

run_experiment(agent, environment, 2, run_id = "test") # fit agent via run_experiment function

environment.test()
agent.eval()

R, J = test_agent(agent, environment)

print(R, J)

-21.00788921607091 -20.014243363220327
-15.843676238460636 -15.124868911930205


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()