In [1]:

import functools
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import os
# RecSim imports
from recsim import agent
from recsim import document
from recsim import user
from recsim.choice_model import MultinomialLogitChoiceModel
from recsim.simulator import environment
from recsim.simulator import recsim_gym
from recsim.simulator import runner_lib
from recsim.environments import interest_exploration
from recsim.agent import AbstractEpisodicRecommenderAgent
from recsim import utils

In [2]:
from recsim.environments import interest_exploration
from MusicEnv.Documents import *
from MusicEnv.Listener import *

from Agents.StaticAgent import StaticAgent
from Agents.GreedyClusterAgent import GreedyClusterAgent
from Agents.QLeaningAgent import QLearningAgent, QLearningType
from Agents.DQNAgent import DqnAgent

Using TensorFlow backend.


In [3]:
def createEnvironment(env_config, num_genres=3):
    
    IEDocument.NUM_CLUSTERS = num_genres
    
    seed = env_config['seed']
    rng = np.random.default_rng(seed)
    user_model = MusicListenerModel(
        env_config['slate_size']
        ,choice_model_ctor = choice_model.MultinomialProportionalChoiceModel
        ,user_state_ctor = MusicListenerState
        ,response_model_ctor = MusicResponse
        ,seed=env_config['seed']
    )

    document_sampler = MusicDocumentSampler(
      topic_distribution = rng.dirichlet(np.ones(num_genres))
      ,topic_quality_mean = rng.uniform(0, 1, num_genres)
      ,topic_quality_stddev = np.ones(num_genres) * 0.1
    )

    ievenv = environment.Environment(
        user_model,
        document_sampler,
        env_config['num_candidates'],
        env_config['slate_size'],
        resample_documents=env_config['resample_documents'])
    
    return recsim_gym.RecSimGymEnv(
        ievenv
        ,clicked_watchtime_reward
        ,utils.aggregate_video_cluster_metrics
        ,utils.write_video_cluster_metrics)



In [4]:
def create_agent_greedy(sess, environment, eval_mode, summary_writer=None):
  return GreedyClusterAgent(environment.observation_space, environment.action_space)

def create_agent_static(sess, environment, eval_mode, summary_writer=None):
  return StaticAgent(environment.observation_space, environment.action_space)

def create_agent_q_simple(sess, environment, eval_mode, summary_writer=None):
  return QLearningAgent(environment.observation_space, environment.action_space,type=QLearningType.SIMPLE)

def create_agent_q_mid(sess, environment, eval_mode, summary_writer=None):
  return QLearningAgent(environment.observation_space, environment.action_space,type=QLearningType.MID)

def create_agent_q_complex(sess, environment, eval_mode, summary_writer=None):
  return QLearningAgent(environment.observation_space, environment.action_space, type=QLearningType.COMPLEX)

def create_agent_dqn(sess, environment, eval_mode, summary_writer=None):
  return DqnAgent(environment.observation_space, environment.action_space)

In [5]:
env_config = {'slate_size': 3,
              'seed': 7,
              'num_candidates': 17,
              'resample_documents': True}

agents = {
        'DQN_4': create_agent_dqn
        # 'Q_learning_SIMPLE' : create_agent_q_simple,
        # 'Q_learning_MID' : create_agent_q_mid,
        # 'Q_learning_COMPLEX' : create_agent_q_complex,
        # 'static' : create_agent_static,
        # 'greedy_beter' : create_agent_greedy,
        # 'Slate_q' : create_agent_slate_q,
}

experiment_name = "TEST_1"

for agent_name, create_agent in agents.items():
        tmp_base_dir = f'tmp\\{experiment_name}\\{agent_name}'
        ie_environment = createEnvironment(env_config, 6)

        runner = runner_lib.TrainRunner(
                checkpoint_frequency=200,
                base_dir=tmp_base_dir,
                create_agent_fn = create_agent,
                env=ie_environment,
                max_training_steps= 100,
                max_steps_per_episode = 100,
                num_iterations=4000
                )

        runner.run_experiment()



# DQN_1 - batch 32 i inna sięć
# DQN_2 - batch 32 i mniejsza sieć
# DQN_3 - bach 8 mniejsza sieć
#DQN_4 - batch 8 i większa sieć

INFO:tensorflow:max_training_steps = 100, number_iterations = 4000,checkpoint frequency = 200 iterations.
INFO:tensorflow:max_steps_per_episode = 100

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

INFO:tensorflow:Beginning training...
INFO:tensorflow:Starting iteration 0



  self.replay_buffer.extend(zip(X_train, np.array(y).reshape(-1,1)))


[LOG] buffer lenght: 297
INFO:tensorflow:Starting iteration 1
[LOG] buffer lenght: 594
INFO:tensorflow:Starting iteration 2
[LOG] buffer lenght: 891
INFO:tensorflow:Starting iteration 3
[LOG] buffer lenght: 1188
INFO:tensorflow:Starting iteration 4
[LOG] buffer lenght: 1485
INFO:tensorflow:Starting iteration 5
[LOG] buffer lenght: 1782
INFO:tensorflow:Starting iteration 6
[LOG] buffer lenght: 2079
INFO:tensorflow:Starting iteration 7
[LOG] buffer lenght: 2376
INFO:tensorflow:Starting iteration 8
[LOG] buffer lenght: 2673
INFO:tensorflow:Starting iteration 9
[LOG] buffer lenght: 2970
[LOG]: epsilon 0.9
INFO:tensorflow:Starting iteration 10
[LOG] buffer lenght: 3267
INFO:tensorflow:Starting iteration 11
[LOG] buffer lenght: 3564
INFO:tensorflow:Starting iteration 12
[LOG] buffer lenght: 3861
INFO:tensorflow:Starting iteration 13
[LOG] buffer lenght: 4158
INFO:tensorflow:Starting iteration 14
[LOG] buffer lenght: 4455
INFO:tensorflow:Starting iteration 15
[LOG] buffer lenght: 4752
INFO:te