# Test des librairies d'apprentissage par renforcement

## 1. MDPToolBox

https://pymdptoolbox.readthedocs.io/en/latest/

### Avantages

- Simple d'utilisation
- Personnalisation du MDP

### Inconvénients

- Ne fonctionne pas si on ne connait pas la dynamique de l'environnement
- Pas d'optimisation

### Algorithmes disponibles

- Finite horizon
- Policy Iteration
- Policy Iteration Modified
- QLearning
- Relative Value Iteration
- Value Iteration
- Value Iteration GS

### Intégration, modification, confiance

 - Simple à intégrer dans le module
 - Simple à modifier, presque aucune dépendance externe
 - Dernier commit 2015
 - Créé par Steven Cordwell, doctorant à l'université de Lincoln
 - Bonne documentation

### Exemple d'utilisation



In [3]:
import mdptoolbox, mdptoolbox.example

P, R = mdptoolbox.example.forest()

agent = mdptoolbox.mdp.QLearning(P, R, 0.96)

agent.run()

agent.Q

array([[68.83586282, 41.47533664],
       [72.57438227, 40.88926026],
       [77.01371452, 65.09179593]])

## 2. PyQLearning

https://code.accel-brain.com/Reinforcement-Learning/

### Avantages

### Inconvénients

- Code difficilement lisible
- Peu d'options par défaut
- Dépendance à mxnet

### Algorithmes disponibles

- EpsilonGreedyQlearning
- BoltzmanQLearning

### Intégration, modification, confiance

- Réalisé par Accel Brain (Société japonaise)
- Documentation peu lisible
- Dernier commit il y a 1 mois

### Exemple d'utilisation

In [4]:
from pyqlearning.samplabledata.policysampler._mxnet.maze_policy import MazePolicy
from accelbrainbase.computableloss._mxnet.l2_norm_loss import L2NormLoss
from accelbrainbase.controllablemodel._mxnet.dqlcontroller.dqn_controller import DQNController
from accelbrainbase.observabledata._mxnet.functionapproximator.function_approximator import FunctionApproximator
from accelbrainbase.noiseabledata._mxnet.gauss_noise import GaussNoise
from accelbrainbase.observabledata._mxnet.neural_networks import NeuralNetworks
from accelbrainbase.observabledata._mxnet.convolutional_neural_networks import ConvolutionalNeuralNetworks
from accelbrainbase.observabledata._mxnet.convolutionalneuralnetworks.mobilenet_v2 import MobileNetV2

import numpy as np
import mxnet as mx
import mxnet.ndarray as nd
import mxnet as mx
import mxnet.ndarray as nd
import numpy as np
import pandas as pd
from mxnet.gluon.nn import Conv2D
from mxnet.gluon.nn import BatchNorm
from mxnet import MXNetError
from logging import getLogger

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from logging import getLogger, StreamHandler, NullHandler, DEBUG, ERROR

logger = getLogger("accelbrainbase")
handler = StreamHandler()
handler.setLevel(DEBUG)
logger.setLevel(DEBUG)
logger.addHandler(handler)

policy_sampler = MazePolicy(
    batch_size=25,
    map_size=(20, 20), 
    moving_max_dist=3,
    memory_num=8,
    possible_n=5,
    repeating_penalty=0.5,
    ctx=mx.gpu(),
)

computable_loss = L2NormLoss()
ctx = mx.gpu()

output_nn = NeuralNetworks(
    # is-a `ComputableLoss` or `mxnet.gluon.loss`.
    computable_loss=computable_loss,
    # `list` of int` of the number of units in hidden/output layers.
    units_list=[1],
    # `list` of act_type` in `mxnet.ndarray.Activation` or `mxnet.symbol.Activation` in input gate.
    activation_list=["sigmoid"],
    # `list` of `float` of dropout rate.
    dropout_rate_list=[0.0],
    # `list` of `mxnet.gluon.nn.BatchNorm`.
    hidden_batch_norm_list=[None],
    # `bool` for using bias or not in output layer(last hidden layer).
    output_no_bias_flag=True,
    # `bool` for using bias or not in all layer.
    all_no_bias_flag=True,
    # Call `mxnet.gluon.HybridBlock.hybridize()` or not.
    hybridize_flag=True,
    # `mx.gpu()` or `mx.cpu()`.
    ctx=ctx,
)

cnn = MobileNetV2(
    # is-a `ComputableLoss` or `mxnet.gluon.loss`.
    computable_loss=computable_loss,
    # is-a `mxnet.initializer.Initializer` for parameters of model. If `None`, it is drawing from the Xavier distribution.
    initializer=None,
    # `int` of the number of filters in input lauer.
    input_filter_n=32,
    # `tuple` or `int` of kernel size in input layer.
    input_kernel_size=(3, 3),
    # `tuple` or `int` of strides in input layer.
    input_strides=(1, 1),
    # `tuple` or `int` of zero-padding in input layer.
    input_padding=(1, 1),
    # `list` of information of bottleneck layers whose `dict` means ...
    # - `filter_rate`: `float` of filter expfilter.
    # - `filter_n`: `int` of the number of filters.
    # - `block_n`: `int` of the number of blocks.
    # - `stride`: `int` or `tuple` of strides.
    bottleneck_dict_list=[
        {
            "filter_rate": 1,
            "filter_n": 32,
            "block_n": 1,
            "stride": 1
        },
        {
            "filter_rate": 1,
            "filter_n": 32,
            "block_n": 2,
            "stride": 1
        },
    ],
    # `int` of the number of filters in hidden layers.
    hidden_filter_n=64,
    # `tuple` or `int` of pooling size in hidden layer.
    # If `None`, the pooling layer will not attatched in hidden layer.
    pool_size=None,
    # is-a `NeuralNetworks` or `mxnet.gluon.block.hybridblock.HybridBlock`.
    output_nn=output_nn,
    # `str` of name of optimizer.
    optimizer_name="sgd",
    # Call `mxnet.gluon.HybridBlock.hybridize()` or not.
    hybridize_flag=True,
    # `mx.gpu()` or `mx.cpu()`.
    ctx=ctx,
)

function_approximator = FunctionApproximator(
    model=cnn, 
    initializer=None,
    hybridize_flag=True,
    scale=1.0, 
    ctx=ctx, 
)

DQN = DQNController(
    function_approximator=function_approximator,
    policy_sampler=policy_sampler,
    computable_loss=L2NormLoss(),
    optimizer_name="SGD",
    learning_rate=1e-06,
    hybridize_flag=True,
    scale=1.0,
    ctx=ctx,
    initializer=None,
    recursive_learning_flag=False,
)
DQN.alpha_value = 0.3

_ = plt.figure(figsize=(10, 10))
plt.imshow(DQN.policy_sampler.map_arr, cmap="gray")
plt.tick_params(labelbottom="off",bottom="off")
plt.tick_params(labelleft="off",left="off")
plt.show()
plt.close()

# Execute learning.
DQN.learn(
    # The number of searching.
    iter_n=500,
)

def moving_avg(arr, window=50):
    return np.convolve(arr, np.ones(window) / window, mode='same')[window:-window]

plt.figure(figsize=(20, 10))
plt.plot(moving_avg(np.abs(DQN.q_logs_arr[:, 0] - DQN.q_logs_arr[:, 1])), label="Loss of Q-Values", color="red")
plt.legend()
plt.show()
plt.close()

  bool = onp.bool


AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

## 3. Acme

![Acme logo](https://raw.githubusercontent.com/deepmind/acme/master/docs/imgs/acme.png "Acme logo")

https://github.com/deepmind/acme

https://dm-acme.readthedocs.io/en/latest/index.html

### Avantages

### Inconvénients

### Algorithmes disponibles

- Grande base d'algorithmes (https://dm-acme.readthedocs.io/en/latest/user/agents.html)

### Intégration, modification, confiance

- Construit à partir de Tensorflow
- Développé par Deepmind
- Documentation pauvre
- Dernier commit il y a 1 mois environ

### Exemple d'utilisation

https://colab.research.google.com/github/deepmind/acme/blob/master/examples/quickstart.ipynb

In [5]:
from typing import Optional

import collections
from dm_control import suite as dm_suite
import dm_env
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from acme import specs
from acme import wrappers
from acme.agents.jax import d4pg
from acme.jax import experiments
from acme.utils import loggers

def make_environment(seed: int) -> dm_env.Environment:
    environment = dm_suite.load('cartpole', 'balance')

    # Make the observations be a flat vector of all concatenated features.
    environment = wrappers.ConcatObservationWrapper(environment)

    # Wrap the environment so the expected continuous action spec is [-1, 1].
    # Note: this is a no-op on 'control' tasks.
    environment = wrappers.CanonicalSpecWrapper(environment, clip=True)

    # Make sure the environment outputs single-precision floats.
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment

def network_factory(spec: specs.EnvironmentSpec) -> d4pg.D4PGNetworks:
    return d4pg.make_networks(
        spec,
        # These correspond to sizes of the hidden layers of an MLP.
        policy_layer_sizes=(256, 256),
        critic_layer_sizes=(256, 256),
    )

logger_dict = collections.defaultdict(loggers.InMemoryLogger)
def logger_factory(
        name: str,
        steps_key: Optional[str] = None,
        task_id: Optional[int] = None,
    ) -> loggers.Logger:
    del steps_key, task_id
    return logger_dict[name]

experiment_config = experiments.ExperimentConfig(
    builder=d4pg_builder,
    environment_factory=make_environment,
    network_factory=network_factory,
    logger_factory=logger_factory,
    seed=0,
    max_num_actor_steps=50_000)  # Each episode is 1000 steps.

experiments.run_experiment(
    experiment=experiment_config,
    eval_every=1000,
    num_eval_episodes=1)

%matplotlib inline
df = pd.DataFrame(logger_dict['evaluator'].data)
plt.figure(figsize=(10, 4))
plt.title('Training episodes returns')
plt.xlabel('Training episodes')
plt.ylabel('Episode return')
plt.plot(df['actor_episodes'], df['episode_return'], label='Training Episodes return')

ModuleNotFoundError: No module named 'dm_control'

## 4. Dopamine

![Dopamine Logo](https://google.github.io/dopamine/images/dopamine_logo.png "Dopamine logo")

https://google.github.io/dopamine/docs/

### Avantages

- Dopamine est conçu pour être modifié directement depuis les sources

### Inconvénients

- Gin configuration framework

### Algorithmes disponibles

- C51,
- DQN,
- IQN,
- Quantile (JAX),
- Rainbow

### Intégration, modification, confiance

- Construit à partir de Tensorflow
- Développé par Google
- Documentation pauvre
- Dernier commit il y a 2 mois environ

### Exemple d'utilisation

https://colab.research.google.com/github/google/dopamine/blob/master/dopamine/colab/cartpole.ipynb#scrollTo=bidurBV0djGi

In [None]:
import numpy as np
import os
from dopamine.discrete_domains import run_experiment
from dopamine.colab import utils as colab_utils
from absl import flags
import gin.tf

BASE_PATH = '/tmp/colab_dopamine_run'  # @param

DQN_PATH = os.path.join(BASE_PATH, 'dqn')

dqn_config = """
# Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters
# chosen achieve reasonable performance.
import dopamine.discrete_domains.gym_lib
import dopamine.discrete_domains.run_experiment
import dopamine.agents.dqn.dqn_agent
import dopamine.replay_memory.circular_replay_buffer
import gin.tf.external_configurables

DQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
DQNAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
DQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
DQNAgent.network = @gym_lib.CartpoleDQNNetwork
DQNAgent.gamma = 0.99
DQNAgent.update_horizon = 1
DQNAgent.min_replay_history = 500
DQNAgent.update_period = 4
DQNAgent.target_update_period = 100
DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
DQNAgent.tf_device = '/gpu:0'  # use '/cpu:*' for non-GPU version
DQNAgent.optimizer = @tf.train.AdamOptimizer()

tf.train.AdamOptimizer.learning_rate = 0.001
tf.train.AdamOptimizer.epsilon = 0.0003125

create_gym_environment.environment_name = 'CartPole'
create_gym_environment.version = 'v0'
create_agent.agent_name = 'dqn'
TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
Runner.num_iterations = 50
Runner.training_steps = 1000
Runner.evaluation_steps = 1000
Runner.max_steps_per_episode = 200  # Default max episode length.

WrappedReplayBuffer.replay_capacity = 50000
WrappedReplayBuffer.batch_size = 128
"""

gin.parse_config(dqn_config, skip_unknown=False)

dqn_runner = run_experiment.create_runner(DQN_PATH, schedule='continuous_train')
print('Will train DQN agent, please be patient, may be a while...')
dqn_runner.run_experiment()
print('Done training!')

# @title Load the training logs.
data = colab_utils.read_experiment(DQN_PATH, verbose=True, summary_keys=['train_episode_returns'])

import seaborn as sns
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(16,8))
sns.lineplot(x='iteration', y='train_episode_returns', hue='agent',
             data=data, ax=ax)
plt.title('Cartpole')
plt.show()

## 5. RLlib

![Ray logo](./assets/ray_logo_w.svg "Ray logo")

https://docs.ray.io/en/latest/rllib/index.html

### Avantages

- Code concis et lisible

### Inconvénients

- Orienté industrie

### Algorithmes disponibles

- Nombreux algorithmes (https://docs.ray.io/en/latest/rllib/rllib-algorithms.html)

### Intégration, modification, confiance

- Construit à partir de Tensorflow et Pytorch
- Développé par Ray (société)
- Documentation bien écrite
- Dernier commit il y a 1 semaine environ

### Exemple d'utilisation

In [None]:
from ray.rllib.algorithms.ppo import PPOConfig

config = (  # 1. Configure the algorithm,
    PPOConfig()
    .environment("Taxi-v3")
    .rollouts(num_rollout_workers=2)
    .framework("tf2")
    .training(model={"fcnet_hiddens": [64, 64]})
    .evaluation(evaluation_num_workers=1)
)

algo = config.build()  # 2. build the algorithm,

for _ in range(5):
    print(algo.train())  # 3. train it,

algo.evaluate()  # 4. and evaluate it.

## 6. TRFL

https://github.com/deepmind/trfl

### Avantages

### Inconvénients

### Algorithmes disponibles

- Nombreux algorithmes (https://github.com/deepmind/trfl/blob/master/docs/index.md#learning-updates)

### Intégration, modification, confiance

- Construit à partir de Tensorflow
- Développé par Deepmind
- Documentation bien écrite
- Dernier commit il y a 2 ans

### Exemple d'utilisation

In [None]:
import tensorflow as tf
import trfl

# Q-values for the previous and next timesteps, shape [batch_size, num_actions].
q_tm1 = tf.get_variable(
    "q_tm1", initializer=[[1., 1., 0.], [1., 2., 0.]], dtype=tf.float32)
q_t = tf.get_variable(
    "q_t", initializer=[[0., 1., 0.], [1., 2., 0.]], dtype=tf.float32)

# Action indices, discounts and rewards, shape [batch_size].
a_tm1 = tf.constant([0, 1], dtype=tf.int32)
r_t = tf.constant([1, 1], dtype=tf.float32)
pcont_t = tf.constant([0, 1], dtype=tf.float32)  # the discount factor

# Q-learning loss, and auxiliary data.
loss, q_learning = trfl.qlearning(q_tm1, a_tm1, r_t, pcont_t, q_t)

## 7. ReAgent

![ReAgent logo](https://raw.githubusercontent.com/facebookresearch/ReAgent/main/logo/reagent_banner.png "ReAgent logo")

https://github.com/facebookresearch/ReAgent

### Avantages

- Algorithmes pour Bandits

### Inconvénients

- Installation via Docker
- Orienté recommandation
- Orienté industrie
- S'utilise en ligne de commande

### Algorithmes disponibles

- Nombreux algorithmes (https://github.com/facebookresearch/ReAgent)

### Intégration, modification, confiance

- Construit à partir de PyTorch
- Développé par Facebook
- Documentation bien écrite
- Dernier commit il y a 1 semaine

### Exemple d'utilisation

We have set up Click commands to run our RL workflow. The basic usage pattern is

    ./reagent/workflow/cli.py run <module.function> <path/to/config>

To train a model online with OpenAI Gym, simply run the Click command:

    # set the config
    export CONFIG=reagent/gym/tests/configs/cartpole/discrete_dqn_cartpole_online.yaml
    # train and evaluate model on gym environment
    ./reagent/workflow/cli.py run reagent.gym.tests.test_gym.run_test $CONFIG

To train a batch RL model, run the following commands:

    # set the config
    export CONFIG=reagent/workflow/sample_configs/discrete_dqn_cartpole_offline.yaml
    # gather some random transitions (can replace with your own)
    ./reagent/workflow/cli.py run reagent.workflow.gym_batch_rl.offline_gym_random $CONFIG
    # convert data to timeline format
    ./reagent/workflow/cli.py run reagent.workflow.gym_batch_rl.timeline_operator $CONFIG
    # train model based on timeline data
    ./reagent/workflow/cli.py run reagent.workflow.training.identify_and_train_network $CONFIG
    # evaluate the model
    ./reagent/workflow/cli.py run reagent.workflow.gym_batch_rl.evaluate_gym "$CONFIG"


## 8. SLM Lab

https://slm-lab.gitbook.io/slm-lab/

### Avantages

### Inconvénients

- N'est pas un package python

### Algorithmes disponibles

- DQN
- SARSA
- ouble-DQN, Dueling-DQN, PER
- A2C (Advantage Actor-Critic) with GAE & n-step
- REINFORCE
- PPO
- SAC
- SIL
- Asynchronous version of all the above

### Intégration, modification, confiance

- Documentation bien écrite
- Dernier commit il y a 1 an

### Exemple d'utilisation

https://slm-lab.gitbook.io/slm-lab/setup/installation

https://colab.research.google.com/gist/kengz/6fd52a902129fb6d4509c721d71bda48/slm_lab_colab.ipynb#scrollTo=MfrncRH9-j-1

    python run_lab.py slm_lab/spec/demo.json dqn_cartpole dev

## 9. DeeR

https://slm-lab.gitbook.io/slm-lab/

### Avantages

### Inconvénients

### Algorithmes disponibles

- Multiple algorithmes (https://deer.readthedocs.io/en/0.4.1/modules/learning-algorithms.html)

### Intégration, modification, confiance

- Documentation bien écrite
- Dernier commit il y a 2 an

### Exemple d'utilisation

In [None]:
import numpy as np
import copy

from deer.base_classes import Environment
import gym

class MyEnv(Environment):
    def __init__(self, rng):
        """ Initialize environment.
        Arguments:
            rng - the numpy random number generator            
        """
        # Defining the type of environment
        self.env = gym.make('CartPole-v0')
        self._last_observation = self.env.reset()
        self.is_terminal=False
        self._input_dim = [(1,), (1,), (1,), (1,)]  # self.env.observation_space.shape is equal to 4 
                                                    # and we use only the current observations in the pseudo-state

    def act(self, action):
        """ Simulate one time step in the environment.
        """
        
        self._last_observation, reward, self.is_terminal, info = self.env.step(action)
        if (self.mode==0): # Show the policy only at test time
            self.env.render()
            
        return reward
                
    def reset(self, mode=0):
        """ Reset environment for a new episode.
        Arguments:
        Mode : int
            -1 corresponds to training and 0 to test
        """
        # Reset initial observation to a random x and theta
        self._last_observation = self.env.reset()
        self.is_terminal=False
        self.mode=mode

        return self._last_observation
                
    def inTerminalState(self):
        """Tell whether the environment reached a terminal state after the last transition (i.e. the last transition 
        that occured was terminal).
        """
        return self.is_terminal

    def inputDimensions(self):
        return self._input_dim  

    def nActions(self):
        return 2 #Would be useful to have this directly in gym : self.env.action_space.shape  

    def observe(self):
        return copy.deepcopy(self._last_observation)
        
def main():
    rng = np.random.RandomState(123456)
    myenv=MyEnv(rng)

    print (myenv.observe())
    
if __name__ == "__main__":
    main()

""" Pendulum environment launcher.
Same principles as run_toy_env. See the docs for more details.
Authors: Vincent Francois-Lavet, David Taralla
"""

import sys
import logging
import numpy as np

import deer.experiment.base_controllers as bc
from deer.default_parser import process_args
from deer.agent import NeuralAgent
from deer.learning_algos.q_net_keras import MyQNetwork
from pendulum_env import MyEnv as pendulum_env

class Defaults:
    # ----------------------
    # Experiment Parameters
    # ----------------------
    STEPS_PER_EPOCH = 100
    EPOCHS = 200
    STEPS_PER_TEST = 100
    PERIOD_BTW_SUMMARY_PERFS = 10

    # ----------------------
    # Environment Parameters
    # ----------------------
    FRAME_SKIP = 1

    # ----------------------
    # DQN Agent parameters:
    # ----------------------
    UPDATE_RULE = 'rmsprop'
    LEARNING_RATE = 0.0002
    LEARNING_RATE_DECAY = 0.99
    DISCOUNT = 0.9
    DISCOUNT_INC = 1.
    DISCOUNT_MAX = 0.95
    RMS_DECAY = 0.9
    RMS_EPSILON = 0.0001
    MOMENTUM = 0
    CLIP_NORM = 1.0
    EPSILON_START = 1.0
    EPSILON_MIN = 0.2
    EPSILON_DECAY = 10000
    UPDATE_FREQUENCY = 1
    REPLAY_MEMORY_SIZE = 1000000
    BATCH_SIZE = 32
    FREEZE_INTERVAL = 500
    DETERMINISTIC = True

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(12345)
    else:
        rng = np.random.RandomState()
    
    # --- Instantiate environment ---
    env = pendulum_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(
        env,
        parameters.rms_decay,
        parameters.rms_epsilon,
        parameters.momentum,
        parameters.clip_norm,
        parameters.freeze_interval,
        parameters.batch_size,
        parameters.update_rule,
        rng,
        double_Q=True)
    
    # --- Instantiate agent ---
    agent = NeuralAgent(
        env,
        qnetwork,
        parameters.replay_memory_size,
        max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
        parameters.batch_size,
        rng)

    # --- Bind controllers to the agent ---
    # For comments, please refer to run_toy_env.py
    agent.attach(bc.VerboseController(
        evaluate_on='epoch', 
        periodicity=1))

    agent.attach(bc.TrainerController(
        evaluate_on='action',
        periodicity=parameters.update_frequency, 
        show_episode_avg_V_value=False, 
        show_avg_Bellman_residual=False))

    agent.attach(bc.LearningRateController(
        initial_learning_rate=parameters.learning_rate,
        learning_rate_decay=parameters.learning_rate_decay,
        periodicity=1))

    agent.attach(bc.DiscountFactorController(
        initial_discount_factor=parameters.discount,
        discount_factor_growth=parameters.discount_inc,
        discount_factor_max=parameters.discount_max,
        periodicity=1))

    agent.attach(bc.EpsilonController(
        initial_e=parameters.epsilon_start, 
        e_decays=parameters.epsilon_decay, 
        e_min=parameters.epsilon_min,
        evaluate_on='action', 
        periodicity=1, 
        reset_every='none'))

    agent.attach(bc.InterleavedTestEpochController(
        id=0, 
        epoch_length=parameters.steps_per_test, 
        periodicity=1, 
        show_score=True,
        summarize_every=parameters.period_btw_summary_perfs))
    
    # --- Run the experiment ---
    agent.run(parameters.epochs, parameters.steps_per_epoch)