# Steam (Environment - Agent)

In [1]:
# Basic import
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from copy import copy, deepcopy
from scipy.stats import invgamma, gamma
from scipy.stats import t as student

## Environment

In [2]:
class Environment:
    """ 
    Contextual Multi-Armed Bandit environment
    """
    def __init__(self, nb_films, nb_users, context_size, seed=None):
        self._nb_games = nb_games
        self._nb_users = nb_users
        self._p = context_size # size of user, size of game
        self._rng = np.random.RandomState(seed)
        self._games = self._rng.uniform(size=(nb_games, context_size))
        self._users = self._rng.uniform(size=(nb_users, context_size))
        self._reward_matrix = np.zeros((nb_users, nb_games))
        for i in range(self._reward_matrix.shape[0]):
            for j in range(self._reward_matrix.shape[1]):
                reward = np.linalg.norm(self._games[j] - self._users[i], ord=2) 
                self._reward_matrix[i, j] = reward
        self._reward_matrix = (self._reward_matrix / np.max(self._reward_matrix) * 4).astype(int) + 1
        self._available_games = np.ones((nb_users, nb_games))

    def step(self):
        """ Play an action """
        user = self._rng.randint(0, self._nb_users)
        available_games = np.where(self._available_games[user] == 1)[0]
        return user, available_games
    
    def update(self, user, game):
        reward = self._reward_matrix[user, game]
        self._available_games[user, game] = 0
        return reward
    
    def reset(self):
        self._users = self._rng.uniform(size=(self._nb_users, self._p))
        self._reward_matrix = np.zeros((nb_users, nb_games))
        for i in range(self._reward_matrix.shape[0]):
            for j in range(self._reward_matrix.shape[1]):
                reward = np.linalg.norm(self._games[j] - self._users[i], ord=2) 
                self._reward_matrix[i, j] = reward
        self._reward_matrix = (self._reward_matrix / np.max(self._reward_matrix) * 4).astype(int) + 1
        self._available_films = np.ones((nb_users, nb_games))
        users = deepcopy(self._users)
        return users

## Agent

In [3]:
class RandomAgent:
    """ 
    Random agent
    """
    def __init__(self, seed = None):
        self._rng = np.random.RandomState(seed)
    
    def act(self, available_games):
        action = self._rng.choice(available_games)
        return action

## Experiment

In [4]:
# Basic parameter
nb_users = 30 #number of users in the context
nb_games = 10 #number of games in the context
context_size = 2 #number of different film categories

In [5]:
# Creating the environment
env = Environment(nb_games,nb_users,context_size,2020)
env.reset() #reset and initilize the environment

array([[0.57654812, 0.17727862],
       [0.9944587 , 0.11737487],
       [0.84084483, 0.01402673],
       [0.87168748, 0.90240675],
       [0.44920447, 0.61818198],
       [0.97897813, 0.39727848],
       [0.44723083, 0.23325998],
       [0.93172938, 0.27280247],
       [0.93232779, 0.4132305 ],
       [0.16067842, 0.46880253],
       [0.26928866, 0.94426857],
       [0.29544093, 0.15155985],
       [0.2725433 , 0.86498109],
       [0.91686141, 0.74978059],
       [0.90022206, 0.02240632],
       [0.04095237, 0.89855301],
       [0.70079315, 0.66934705],
       [0.55284578, 0.72274166],
       [0.85393131, 0.60901857],
       [0.45241441, 0.29665953],
       [0.63061983, 0.50703485],
       [0.37025605, 0.13821335],
       [0.75918439, 0.9589782 ],
       [0.27902813, 0.7308044 ],
       [0.19991699, 0.64376932],
       [0.20287344, 0.05044151],
       [0.37199545, 0.01409271],
       [0.09958179, 0.99934911],
       [0.92577299, 0.81490306],
       [0.22237219, 0.09742508]])

In [6]:
# Creating the agent
agent = RandomAgent(2020)

We run the experiment and generate some historical data.

In [7]:
# Running several trials
nb_iteration = 100 #how many trials
rating_matrix = np.zeros((env._nb_users, env._nb_games))
users = list()
games = list()
ratings = list()
for i in range(nb_iteration):
    user, available_games = env.step()
    choosen_game = agent.act(available_games)
    reward = env.update(user, choosen_game)
    users.append(user)
    games.append(choosen_game)
    ratings.append(reward)
    rating_matrix[user, choosen_game] = reward
    '''
    print("user = {}, recommended_games = {}, choosen_game = {}".format(user,recommended_games,choosen_game))
    print("reward = {}\n".format(reward))
    '''
    
print("rating matrix: \n", str(rating_matrix))

rating matrix: 
 [[3. 0. 0. 0. 2. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 4. 2. 4. 4. 0.]
 [0. 0. 0. 0. 0. 0. 0. 3. 0. 0.]
 [1. 3. 0. 0. 0. 0. 0. 3. 0. 0.]
 [3. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [2. 2. 0. 0. 0. 0. 0. 3. 0. 0.]
 [0. 1. 0. 1. 0. 3. 2. 0. 2. 0.]
 [3. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [2. 0. 0. 3. 1. 0. 0. 0. 3. 0.]
 [0. 0. 0. 1. 3. 2. 0. 1. 1. 1.]
 [0. 3. 3. 3. 0. 1. 0. 2. 2. 0.]
 [0. 1. 1. 0. 0. 0. 0. 2. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 2. 0.]
 [1. 0. 0. 3. 3. 0. 0. 3. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 4. 0.]
 [0. 0. 3. 0. 0. 0. 0. 0. 0. 3.]
 [0. 0. 0. 0. 2. 0. 0. 0. 2. 0.]
 [0. 0. 2. 0. 3. 0. 0. 1. 0. 0.]
 [0. 2. 0. 3. 0. 3. 0. 2. 0. 0.]
 [0. 1. 1. 0. 2. 0. 0. 2. 2. 2.]
 [0. 0. 0. 2. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 2. 0. 2. 0.]
 [1. 0. 4. 0. 3. 3. 3. 2. 3. 0.]
 [0. 0. 0. 2. 0. 0. 3. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 2.]
 [0. 2. 0. 0. 0. 0. 0. 0. 2. 0.]
 [0. 0. 0. 2. 0. 0. 0. 3. 0. 2.]
 [4. 3. 3. 0. 5. 1. 0. 2. 2. 3.]
 [1. 3. 3. 0. 0. 3. 0. 0. 3. 4.]
 [4. 0. 0. 0. 0. 0. 3. 0. 

## Deep matrix factorization and Embedding agent

In [8]:
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dropout
from tensorflow.keras.layers import Dot
from tensorflow.keras.models import Model

In [9]:
class RegressionModel(Model):
    def __init__(self, embedding_size, max_user, max_game):
        super().__init__()
        
        self.user_embedding = Embedding(output_dim=embedding_size,
                                        input_dim=max_user,
                                        input_length=1,
                                        name='user_embedding')
        self.game_embedding = Embedding(output_dim=embedding_size,
                                        input_dim=max_game,
                                        input_length=1,
                                        name='game_embedding')
        
        # The following two layers don't have parameters.
        self.flatten = Flatten()
        self.dot = Dot(axes=1)
        
    def call(self, inputs):
        user_inputs = inputs[0]
        game_inputs = inputs[1]
        
        user_vecs = self.flatten(self.user_embedding(user_inputs))
        game_vecs = self.flatten(self.game_embedding(game_inputs))
        
        y = self.dot([user_vecs, game_vecs])
        return y

model = RegressionModel(64, nb_users, nb_games)
model.compile(optimizer="adam", loss='mae')

In [10]:
users = np.array(users)
games = np.array(games)
ratings = np.array(ratings)

history = model.fit([users, games], ratings,
                    batch_size=64, epochs=100, validation_split=0.1,
                    shuffle=True)

embeddings = model.get_weights()

Train on 90 samples, validate on 10 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epo

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [11]:
def cosine(x, y):
    dot = np.dot(x, y)
    norm_x = np.linalg.norm(x)
    norm_y = np.linalg.norm(y)
    cos = dot / (norm_x * norm_y)
    return cos

In [12]:
class EmbeddingAgent:
    """ 
    Embedding Agent
    """
    def __init__(self, user_embeddings, game_embeddings):
        self._game_embeddings = game_embeddings
        self._user_embeddings = user_embeddings
    
    def act(self, user, available_games):
        user_embedding = self._user_embeddings[user]
        dot_products = self._game_embeddings @ user_embedding
        user_embedding_norm = np.linalg.norm(user_embedding)
        all_item_norms = np.linalg.norm(self._game_embeddings, axis=1)
        norm_products = user_embedding_norm * all_item_norms
        sims = dot_products / (norm_products)
        sims = np.argsort(sims)[::-1]
        mask = np.in1d(sims, available_games)
        sims = sims[mask]
        return sims[0]

In [13]:
# Creating the agent
agent = EmbeddingAgent(embeddings[0], embeddings[1])

In [14]:
# Running several trials
nb_iteration = 20 #how many trials
for i in range(nb_iteration):
    user, available_games = env.step()
    choosen_game = agent.act(user, available_games)
    reward = env.update(user, choosen_game)
    rating_matrix[user, choosen_game] = reward
    print("user = {}, available games = {}, choosen_game = {}".format(user,available_games,choosen_game))
    print("reward = {}\n".format(reward))

user = 4, available games = [1 2 3 4 5 6 7 8 9], choosen_game = 4
reward = 3

user = 19, available games = [0 3 5 6], choosen_game = 5
reward = 2

user = 29, available games = [1 2 3 4 5 7 8 9], choosen_game = 4
reward = 3

user = 17, available games = [0 1 3 5 6 8 9], choosen_game = 0
reward = 2

user = 17, available games = [1 3 5 6 8 9], choosen_game = 9
reward = 3

user = 1, available games = [0 1 2 3 4 9], choosen_game = 3
reward = 3

user = 17, available games = [1 3 5 6 8], choosen_game = 5
reward = 2

user = 21, available games = [0 1 2 4 5 7 9], choosen_game = 7
reward = 2

user = 16, available games = [0 1 2 3 5 6 7 9], choosen_game = 5
reward = 2

user = 13, available games = [1 2 5 6 8 9], choosen_game = 5
reward = 3

user = 26, available games = [0 1 2 4 5 6 8], choosen_game = 1
reward = 2

user = 6, available games = [0 2 4 7 9], choosen_game = 7
reward = 2

user = 21, available games = [0 1 2 4 5 9], choosen_game = 5
reward = 3

user = 24, available games = [0 1 2 3 4 5 