# Test best margin on models

The input similarities are not normalized $\in \{0, 4\}$. So we will test these margin values to fond the best for each model : $\{0.5, 1, 1.5, 2, 2.5, 3, 3.5\}$.

In [1]:
margins = [0.5,1,1.5,2,2.5,3,3.5]

In [2]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import gc
import requests 
from time import sleep
import json
import tensorflow as tf
from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, Dense, Dropout, Concatenate, Lambda, Dot

from keras.regularizers import l2

Using TensorFlow backend.


In [20]:
# For LightFM + Siamese model
from sklearn.metrics.pairwise import cosine_similarity
import tensorflow as tf
from lightfm import LightFM
from scipy.sparse import csr_matrix

In [3]:
## Importing Useful Functions predefined
%run useful_functions_modified.py
from useful_functions_modified import *

## 1st Model : only ids without new users

In [4]:
user_id = '0H3BRZ9M0BQP3SFPSCL3'
base_url ='http://35.180.178.243/'
url_reset = base_url+"reset"
url_predict = base_url+'predict'
params = {'user_id':user_id}
r = requests.get(url=url_reset,params=params) # get history of rating
data = r.json()
data.keys()

dict_keys(['nb_items', 'nb_users', 'action_history', 'rewards_history', 'state_history', 'next_state'])

In [5]:
nb_users, nb_items = data['nb_users'], data['nb_items']

action_history, state_history, rewards_history = data['action_history'], data['state_history'], data['rewards_history']
next_state = data['next_state']

users_ids = list(zip(*list(list(zip(*state_history))[0])))[0]
pos_rewards = compute_pos_rewards(rewards_history)
pos_data = create_pos_data(pos_rewards,state_history,action_history)

nb_iters, n_epochs = 1000, 50

In [6]:
models = []

In [7]:
print("Start building models for different margins...")
for ma in margins:
    print("\tBuild model for margin = ", ma)
    def margin_comparator_loss_(inputs, margin=ma):
        positive_pair_sim, negative_pair_sim = inputs
        return tf.maximum(negative_pair_sim - positive_pair_sim + margin, 0)

    rewards = 0
    nb_reward_pos=0
    deep_match_model, deep_triplet_model = build_models(nb_users, nb_items, user_dim=32,
                                                    item_dim= 15, n_hidden =2, hidden_size=64,
                                                    dropout=0.1,l2_reg=0,loss=margin_comparator_loss_)
    deep_triplet_model.compile(loss=identity_loss, optimizer='adam')
    fake_y = np.ones_like(pos_data['user_id'])

    for i in range(n_epochs):
        # Sample new negatives to build different triplets at each epoch
        triplet_inputs = sample_triplets(pos_data,random_seed=i)
        # Fit the model incrementally by doing a single pass over the
        # sampled triplets.
        deep_triplet_model.fit(triplet_inputs, fake_y, shuffle=True, batch_size=32, epochs=1, verbose=0)
    models.append(deep_match_model)
print("Finisht building models.")

Start building models for different margins...
	Build model for margin =  0.5
	Build model for margin =  1
	Build model for margin =  1.5
	Build model for margin =  2
	Build model for margin =  2.5
	Build model for margin =  3
	Build model for margin =  3.5
Finisht building models.


In [8]:
for margin, model in zip(margins, models):
    rewards, nb_reward_pos = 0, 0
    k, max_k = 0, int(nb_iters / 50) # index just to print progrees
    s = 'Test for margin: ' + str(margin)
    for i in range(nb_iters):  
        if i % 50 == 0:
            end_ln = "\r" if i != (nb_iters-1) else "\n"
            print(s + "\t|" + k * "==" +">"+ (max_k - k - 1) * "--" +"|" + str(int(i * 100/nb_iters)) + "%", end=end_ln)
            k += 1
        sleep(0.05) # sleep to let the API breathe and allow others to call requests
        next_user = np.asarray([next_state[0][0] for i in range(len(next_state))])
        list_items = np.asarray(list(list(zip(*next_state))[1]))

        predictions = model.predict([next_user, list_items])
        recommended_item = np.argmax(predictions)

        params['recommended_item'] = recommended_item 
        r=requests.get(url=url_predict,params=params)
        d=r.json()
        reward= d['reward'] # previous reward for the recommended item predicted

        # check how many times the item recommended was actually bought
        if reward > 0 : 
            nb_reward_pos+=1

        next_state = d['state']
        rewards += reward

    print('\tAverage reward: ', rewards/nb_iters)
    print('\tPercentage of positive rewards: ', 100*(nb_reward_pos/nb_iters), '%')

	Percentage of positive rewards:  33.4 %
	Percentage of positive rewards:  32.7 %
	Percentage of positive rewards:  30.9 %
	Percentage of positive rewards:  27.900000000000002 %
	Percentage of positive rewards:  23.1 %
	Percentage of positive rewards:  25.8 %
	Percentage of positive rewards:  23.3 %


$$ \begin{array}{|c|c|c|} \hline
   \textbf{margin} & \textbf{avg reward} & \textbf{% of postitve} \\ \hline
   0.5 & 132.82349842332832 & \textbf{33.4} \\ 
   1   & 115.27638840276973 & 32.7 \\
   1.5 & \textbf{154.3926711921762} &  30.9  \\
   2   & 119.7954770912971  & 27.9 \\
   2.5 & 123.47391156653173 & 23.1 \\
   3   & 137.85805247163256 & 25.8  \\
   3.5 & 129.54683952594434 & 23.3  \\ \hline
\end{array} $$


## 2nd Model : adding coavariates

In [6]:
r = requests.get(url=url_reset,params=params) # get history of rating
data = r.json()
nb_users, nb_items = data['nb_users'], data['nb_items']

action_history, state_history, rewards_history = data['action_history'], data['state_history'], data['rewards_history']
next_state = data['next_state']

users_ids = list(zip(*list(list(zip(*state_history))[0])))[0]
pos_rewards = compute_pos_rewards(rewards_history)
pos_data = create_pos_data(pos_rewards,state_history,action_history)

nb_iters, n_epochs = 1000, 50
models2 = []

In [7]:
print("Start building models for different margins...")
for ma in margins:
    print("\tBuild model for margin = ", ma)
    def margin_comparator_loss_(inputs, margin=ma):
        positive_pair_sim, negative_pair_sim = inputs
        return tf.maximum(negative_pair_sim - positive_pair_sim + margin, 0)

    deep_match_model2, deep_triplet_model2 = build_models_covariates(nb_users, nb_items, user_dim=32,
                                                                item_dim= 15, n_hidden =2, hidden_size=64,
                                                                dropout=0.1,l2_reg=0,loss=margin_comparator_loss_)
    deep_triplet_model2.compile(loss=identity_loss, optimizer='adam')
    fake_y = np.ones_like(pos_data['user_id'])

    for i in range(n_epochs):
        # Sample new negatives to build different triplets at each epoch
        inputs = sample_quintuplets(pos_data,state_history, random_seed=i)
    
        # Fit the model incrementally by doing a single pass over the sampled quintuplets.
        deep_triplet_model2.fit(inputs, fake_y, shuffle=True, batch_size=32, epochs=1, verbose=0)
    models2.append(deep_match_model2)
print("Finisht building models.")

Start building models for different margins...
	Build model for margin =  0.5
	Build model for margin =  1
	Build model for margin =  1.5
	Build model for margin =  2
	Build model for margin =  2.5
	Build model for margin =  3
	Build model for margin =  3.5
Finisht building models.


In [8]:
for margin, model in zip(margins, models2):
    rewards, nb_reward_pos = 0, 0
    k, max_k = 0, int(nb_iters / 50) # index just to print progrees
    s = 'Test for margin: ' + str(margin)
    end_ln = "\r"
    for i in range(nb_iters): 
        if i % 50 == 0:
            if (max_k - k - 1 == 0): end_ln = "\n"
            print(s + "\t|" + k * "==" +">"+ (max_k - k - 1) * "--" +"|" + str(int(i * 100/nb_iters)) + "%", end=end_ln)
            k += 1
        sleep(0.05) # sleep to let the API breathe and allow others to call requests
        next_user = np.asarray([next_state[0][0] for i in range(len(next_state))])
        list_items = np.asarray(list(list(zip(*next_state))[1]))
        list_feat_user = np.expand_dims(np.asarray([next_state[0][3:5] for i in range(len(next_state))]), axis=1)
        list_feat_items = np.expand_dims(np.asarray([next_state[0][5:-1] for i in range(len(next_state))]), axis=1)

        predictions = model.predict([next_user, list_items, list_feat_user, list_feat_items])
        recommended_item = np.argmax(predictions)

        params['recommended_item'] = recommended_item 
        r=requests.get(url=url_predict,params=params)
        d=r.json()
        reward= d['reward'] # previous reward for the recommended item predicted

        # check how many times the item recommended was actually bought
        if reward > 0 : 
            nb_reward_pos+=1

        next_state = d['state']
        rewards += reward
    print('\tAverage reward: ', rewards/nb_iters)
    print('\tPercentage of positive rewards: ', 100*(nb_reward_pos/nb_iters), '%')

	Average reward:  91.20541694875492
	Percentage of positive rewards:  20.8 %
	Average reward:  169.34868136789063
	Percentage of positive rewards:  31.2 %
	Average reward:  86.15808849619859
	Percentage of positive rewards:  30.099999999999998 %
	Average reward:  59.38050090655821
	Percentage of positive rewards:  22.6 %
	Average reward:  93.63423404862137
	Percentage of positive rewards:  23.400000000000002 %
	Average reward:  99.31253386815727
	Percentage of positive rewards:  24.9 %
	Average reward:  129.38822175934718
	Percentage of positive rewards:  23.0 %


$$ \begin{array}{|c|c|c|} \hline
   \textbf{margin} & \textbf{avg reward} & \textbf{% of postitve} \\ \hline
   0.5 & 91.20541694875492 & 20.8 \\ 
   1   & \textbf{169.34868136789063} & \textbf{31.2} \\
   1.5 & 86.15808849619859 & 30.09  \\
   2   & 59.38050090655821 & 22.6 \\
   2.5 & 93.63423404862137 & 23.40 \\
   3   & 99.31253386815727 & 24.9  \\
   3.5 & 129.38822175934718 & 23.0  \\ \hline
\end{array} $$

## 3rd Model : Tackling Cold Start Issue (e.g New Users)

In [9]:
from sklearn.metrics.pairwise import cosine_similarity

In [10]:
r = requests.get(url=url_reset,params=params) # get history of rating
data = r.json()
nb_users, nb_items = data['nb_users'], data['nb_items']

action_history, state_history, rewards_history = data['action_history'], data['state_history'], data['rewards_history']
next_state = data['next_state']

users_ids = list(zip(*list(list(zip(*state_history))[0])))[0]
pos_rewards = compute_pos_rewards(rewards_history)
pos_data = create_pos_data(pos_rewards,state_history,action_history)

nb_iters, nb_epochs = 1000, 50
models3 = []

In [11]:
print("Start building models for different margins...")
for ma in margins:
    print("\tBuild model for margin = ", ma)
    def margin_comparator_loss_(inputs, margin=ma):
        positive_pair_sim, negative_pair_sim = inputs
        return tf.maximum(negative_pair_sim - positive_pair_sim + margin, 0)

    deep_match_model3, deep_triplet_model3 = build_models_covariates(nb_users, nb_items, user_dim=32,
                                                                item_dim= 15, n_hidden =2, hidden_size=64,
                                                                dropout=0.1,l2_reg=0,loss=margin_comparator_loss_)
    deep_triplet_model3.compile(loss=identity_loss, optimizer='adam')
    fake_y = np.ones_like(pos_data['user_id'])

    for i in range(n_epochs):
        # Sample new negatives to build different triplets at each epoch
        inputs = sample_quintuplets(pos_data,state_history, random_seed=i)
    
        # Fit the model incrementally by doing a single pass over the sampled quintuplets.
        deep_triplet_model3.fit(inputs, fake_y, shuffle=True, batch_size=32, epochs=1, verbose=0)
    models3.append(deep_match_model2)
print("Finisht building models.")

Start building models for different margins...
	Build model for margin =  0.5
	Build model for margin =  1
	Build model for margin =  1.5
	Build model for margin =  2
	Build model for margin =  2.5
	Build model for margin =  3
	Build model for margin =  3.5
Finisht building models.


In [12]:
for margin, model in zip(margins, models2):
    rewards, nb_reward_pos = 0, 0
    k, max_k = 0, int(nb_iters / 50) # index just to print progrees
    s = 'Test for margin: ' + str(margin)
    end_ln = "\r"
    for i in range(nb_iters): 
        if i % 50 == 0 or i == (nb_iters - 1):
            if i == (nb_iters - 1) : end_ln = "\n"
            print(s + "\t|" + k * "==" +">"+ (max_k - k - 1) * "--" +"|" + str(int(i * 100/nb_iters)) + "%", end=end_ln)
            k += 1
        sleep(0.05) # sleep to let the API breathe and allow others to call requests
        if next_state[0][0] in pos_data.user_id.unique().tolist():
            next_user = np.asarray([next_state[0][0] for i in range(len(next_state))])
            list_feat_user = np.expand_dims(np.asarray([next_state[0][3:5] for i in range(len(next_state))]), axis=1)
        else:
            #predict items based on users' profile similarity 
            most_similar_user_id = compute_most_similar(state_history,next_state,pos_data) 
            next_user = np.asarray([most_similar_user_id for i in range(len(next_state))])
            list_feat_user = list(pos_data.loc[pos_data.user_id==most_similar_user_id,'feat_users'])[0]
            list_feat_user = np.expand_dims(np.asarray([list_feat_user for i in range(len(next_state))]), axis=1)

        list_items = np.asarray(list(list(zip(*next_state))[1]))
        list_feat_items = np.expand_dims(np.asarray([next_state[0][5:-1] for i in range(len(next_state))]), axis=1)

        predictions = model.predict([next_user, list_items, list_feat_user, list_feat_items])
        recommended_item = np.argmax(predictions)

        params['recommended_item'] = recommended_item 
        r=requests.get(url=url_predict,params=params)
        d=r.json()
        reward= d['reward'] # previous reward for the recommended item predicted

        # check how many times the item recommended was actually bought
        if reward > 0 : 
            nb_reward_pos+=1

        next_state = d['state']
        rewards += reward
    print('\tAverage reward: ', rewards/nb_iters)
    print('\tPercentage of positive rewards: ', 100*(nb_reward_pos/nb_iters), '%')

	Average reward:  117.42578061727096
	Percentage of positive rewards:  19.7 %
	Average reward:  121.62543696438043
	Percentage of positive rewards:  24.9 %
	Average reward:  169.73302930663246
	Percentage of positive rewards:  28.1 %
	Average reward:  113.18605313848596
	Percentage of positive rewards:  24.0 %
	Average reward:  111.52637722525041
	Percentage of positive rewards:  17.8 %
	Average reward:  87.66752154725002
	Percentage of positive rewards:  18.8 %
	Average reward:  107.42923167915677
	Percentage of positive rewards:  19.2 %


$$ \begin{array}{|c|c|c|} \hline
   \textbf{margin} & \textbf{avg reward} & \textbf{% of postitve} \\ \hline
   0.5 & 117.42578061727096 & 19.7  \\ 
   1   & 121.62543696438043 & 24.9 \\
   1.5 & \textbf{169.73302930663246} & \textbf{28.1}  \\
   2   & 113.18605313848596 & 24.0 \\
   2.5 & 111.52637722525041 & 17.8 \\
   3   & 87.66752154725002 & 18.8  \\
   3.5 & 107.42923167915677 & 19.2  \\ \hline
\end{array} $$

## 4rth Model : Model 3 + Adding Price in Features

In [13]:
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
r = requests.get(url=url_reset,params=params) # get history of rating
data = r.json()
nb_users, nb_items = data['nb_users'], data['nb_items']

action_history, state_history, rewards_history = data['action_history'], data['state_history'], data['rewards_history']
next_state = data['next_state']

users_ids = list(zip(*list(list(zip(*state_history))[0])))[0]
pos_rewards = compute_pos_rewards(rewards_history)
pos_data = create_pos_data(pos_rewards,state_history,action_history)

nb_iters, nb_epochs = 1000, 50
models4 = []

In [16]:
print("Start building models for different margins...")
for ma in margins:
    print("\tBuild model for margin = ", ma)
    def margin_comparator_loss_(inputs, margin=ma):
        positive_pair_sim, negative_pair_sim = inputs
        return tf.maximum(negative_pair_sim - positive_pair_sim + margin, 0)

    deep_match_model4, deep_triplet_model4 = build_models_covariates_price(nb_users, nb_items, user_dim=32,
                                                                        item_dim= 15, n_hidden =2, hidden_size=64,
                                                                        dropout=0.1,l2_reg=0,loss=margin_comparator_loss_)
    deep_triplet_model4.compile(loss=identity_loss, optimizer='adam')
    fake_y = np.ones_like(pos_data['user_id'])

    for i in range(n_epochs):
        # Sample new negatives to build different triplets at each epoch
        inputs = sample_quintuplets_price(pos_data,state_history, random_seed=i)
    
        # Fit the model incrementally by doing a single pass over the sampled quintuplets.
        deep_triplet_model4.fit(inputs, fake_y, shuffle=True, batch_size=32, epochs=1, verbose=0)
    models4.append(deep_match_model2)
print("Finisht building models.")

Start building models for different margins...
	Build model for margin =  0.5
	Build model for margin =  1
	Build model for margin =  1.5
	Build model for margin =  2
	Build model for margin =  2.5
	Build model for margin =  3
	Build model for margin =  3.5
Finisht building models.


In [19]:
for margin, model in zip(margins, models2):
    rewards, nb_reward_pos = 0, 0
    mean,std = compute_price_norm(state_history)
    k, max_k = 0, int(nb_iters / 50) # index just to print progrees
    s = 'Test for margin: ' + str(margin)
    end_ln = "\r"
    for i in range(nb_iters): 
        if i % 50 == 0:
            if (max_k - k - 1 == 0): end_ln = "\n"
            print(s + "\t|" + k * "==" +">"+ (max_k - k - 1) * "--" +"|" + str(int(i * 100/nb_iters)) + "%", end=end_ln)
            k += 1
        sleep(0.05) # sleep to let the API breathe and allow others to call requests

        list_items = np.asarray(list(list(zip(*next_state))[1]))
        if next_state[0][0] in pos_data.user_id.unique().tolist():
            next_user = np.asarray([next_state[0][0] for i in range(len(next_state))])
            list_feat_user = np.expand_dims(np.asarray([next_state[0][3:5] for i in range(len(next_state))]), axis=1)
        else:
            #predict items based on users' profile similarity 
            most_similar_user_id = compute_most_similar(state_history,next_state,pos_data) 
            next_user = np.asarray([most_similar_user_id for i in range(len(next_state))])
            list_feat_user = list(pos_data.loc[pos_data.user_id==most_similar_user_id,'feat_users'])[0]
            list_feat_user = np.expand_dims(np.asarray([list_feat_user for i in range(len(next_state))]), axis=1)


        prices =list(list(zip(*next_state))[1])
        feat_items = [next_state[0][5:-1] for i in range(len(next_state))]
        prices_norm = [(price-mean)/std for price in prices ] 

        for i,feature in  enumerate(feat_items):
            feature.append(prices_norm[i])

        list_feat_items = np.expand_dims(np.asarray(feat_items), axis=1)

        predictions = deep_match_model4.predict([next_user, list_items, list_feat_user, list_feat_items])
        recommended_item = np.argmax(predictions) # position item 

        params['recommended_item'] = recommended_item 
        r=requests.get(url=url_predict,params=params)
        d=r.json()
        reward= d['reward'] # previous reward for the recommended item predicted

        # check how many times the item recommended was actually bought
        if reward > 0 : 
            nb_reward_pos+=1

        next_state = d['state']
        rewards += reward
    print('\tAverage reward: ', rewards/nb_iters)
    print('\tPercentage of positive rewards: ', 100*(nb_reward_pos/nb_iters), '%')

	Average reward:  157.17093943610718
	Percentage of positive rewards:  34.300000000000004 %
	Average reward:  150.76404292177008
	Percentage of positive rewards:  28.199999999999996 %
	Average reward:  121.38626228014208
	Percentage of positive rewards:  22.6 %
	Average reward:  98.01896432391887
	Percentage of positive rewards:  19.2 %
	Average reward:  80.01904241963929
	Percentage of positive rewards:  16.8 %
	Average reward:  78.6337802661393
	Percentage of positive rewards:  18.5 %
	Average reward:  95.7018348709514
	Percentage of positive rewards:  22.900000000000002 %


$$ \begin{array}{|c|c|c|} \hline
   \textbf{margin} & \textbf{avg reward} & \textbf{% of postitve} \\ \hline
   0.5 & \textbf{157.17093943610718} & \textbf{34.30}  \\ 
   1   & 150.76404292177008 & 28.19 \\
   1.5 & 121.38626228014208 & 22.6  \\
   2   & 98.01896432391887 & 19.2 \\
   2.5 & 80.01904241963929 & 16.8 \\
   3   & 78.6337802661393 & 18.5  \\
   3.5 & 95.7018348709514 & 22.90  \\ \hline
\end{array} $$

## Model Hybrid : LightFM + Covariates / Cold Start

In [29]:
user_id = '0H3BRZ9M0BQP3SFPSCL3'
r = requests.get(url=url_reset,params=params) # get history of rating
data = r.json()
nb_users, nb_items = data['nb_users'], data['nb_items']

action_history, state_history, rewards_history = data['action_history'], data['state_history'], data['rewards_history']
next_state = data['next_state']

users_ids = list(zip(*list(list(zip(*state_history))[0])))[0]
pos_rewards = compute_pos_rewards(rewards_history)
pos_data = create_pos_data(pos_rewards,state_history,action_history)

nb_iters, nb_epochs = 1000, 50
models5 = []

In [30]:
dict_users, dict_items = {}, {}
for i,j in enumerate(pos_data['user_id']):
    dict_users[j] = i
for i,j in enumerate(pos_data['item_id']):
    dict_items[j] = i
rows = [dict_users[i] for i in pos_data['user_id']]
columns = [dict_items[i] for i in pos_data['item_id']]

M,N = np.max(rows), np.max(columns)
c = csr_matrix((np.ones((len(pos_data))), (rows, columns)), shape=(M+1, N+1))

In [31]:
nb_threads, nb_components, nb_epochs, alpha = 2, 30, 60, 1e-5

# Let's fit a WARP model: these generally have the best performance.
model_lightFM = LightFM(loss='warp',learning_schedule='adagrad',item_alpha=alpha,no_components=nb_components)
model_lightFM.fit(c, epochs=nb_epochs, num_threads=nb_threads)

<lightfm.lightfm.LightFM at 0x2265089c978>

In [32]:
print("Start building models for different margins...")
for ma in margins:
    print("\tBuild model for margin = ", ma)
    def margin_comparator_loss_(inputs, margin=ma):
        positive_pair_sim, negative_pair_sim = inputs
        return tf.maximum(negative_pair_sim - positive_pair_sim + margin, 0)
    
    

    deep_match_model5, deep_triplet_model5 = build_models_covariates_price(nb_users, nb_items, user_dim=32,
                                                                        item_dim= 15, n_hidden =2, hidden_size=64,
                                                                        dropout=0.1,l2_reg=0,loss=margin_comparator_loss_)
    deep_triplet_model5.compile(loss=identity_loss, optimizer='adam')
    fake_y = np.ones_like(pos_data['user_id'])

    for i in range(n_epochs):
        # Sample new negatives to build different triplets at each epoch
        inputs = sample_quintuplets_price(pos_data,state_history, random_seed=i)
    
        # Fit the model incrementally by doing a single pass over the sampled quintuplets.
        deep_triplet_model5.fit(inputs, fake_y, shuffle=True, batch_size=32, epochs=1, verbose=0)
    models4.append(deep_match_model2)
print("Finisht building models.")

Start building models for different margins...
	Build model for margin =  0.5
	Build model for margin =  1
	Build model for margin =  1.5
	Build model for margin =  2
	Build model for margin =  2.5
	Build model for margin =  3
	Build model for margin =  3.5
Finisht building models.


In [34]:
for margin, model in zip(margins, models2):
    rewards, nb_reward_pos,nb_switch,switch_success=0,0,0,0
    mean,std = compute_price_norm(state_history)
    k, max_k = 0, int(nb_iters / 50) # index just to print progrees
    s = 'Test for margin: ' + str(margin)
    end_ln = "\r"
    for i in range(nb_iters): 
        if i % 50 == 0:
            if (max_k - k - 1 == 0): end_ln = "\n"
            print(s + "\t|" + k * "==" +">"+ (max_k - k - 1) * "--" +"|" + str(int(i * 100/nb_iters)) + "%", end=end_ln)
            k += 1
        sleep(0.05) # sleep to let the API breathe and allow others to call requests

        list_items = np.asarray(list(list(zip(*next_state))[1]))
        if next_state[0][0] in pos_data.user_id.unique().tolist():
            next_user = np.asarray([next_state[0][0] for i in range(len(next_state))])
            next_userLFM = dict_users[next_state[0][0]]
            list_feat_user = np.expand_dims(np.asarray([next_state[0][3:5] for i in range(len(next_state))]), axis=1)
        else:
            #predict items based on users' profile similarity 
            most_similar_user_id = compute_most_similar(state_history,next_state,pos_data) 
            next_user = np.asarray([most_similar_user_id for i in range(len(next_state))])
            next_userLFM = dict_users[most_similar_user_id]
            list_feat_user = list(pos_data.loc[pos_data.user_id==most_similar_user_id,'feat_users'])[0]
            list_feat_user = np.expand_dims(np.asarray([list_feat_user for i in range(len(next_state))]), axis=1)


        prices =list(list(zip(*next_state))[1])
        feat_items = [next_state[0][5:-1] for i in range(len(next_state))]
        prices_norm = [(price-mean)/std for price in prices ] 

        for i,feature in  enumerate(feat_items):
            feature.append(prices_norm[i])

        list_feat_items = np.expand_dims(np.asarray(feat_items), axis=1)

        predictions = deep_match_model5.predict([next_user, list_items, list_feat_user, list_feat_items])
        predictionsLFM = model_lightFM.predict(user_ids=next_userLFM, item_ids=list_items)
        recommended_item = np.argmax(predictions)
        recommended_itemLFM = np.argmax(predictionsLFM)

        ## Recommend most expensive 
        if recommended_item != recommended_itemLFM:
            if next_state[recommended_item][2] < next_state[recommended_itemLFM][2]:
                recommended_item = recommended_itemLFM
                nb_switch+=1

        params['recommended_item'] = recommended_item 
        r=requests.get(url=url_predict,params=params)
        d=r.json()
        reward= d['reward'] # previous reward for the recommended item predicted
        if reward > 0 : 
            nb_reward_pos+=1 
            if recommended_item==recommended_itemLFM:
                switch_success+=1

        next_state = d['state']
        rewards += reward
    print('\tAverage reward: ', rewards/nb_iters)
    print('\tPercentage of positive rewards: ', 100*(nb_reward_pos/nb_iters), '%')
    print('\tNb times we recommended based on LightFM results: ',nb_switch)
    print('\tNb times the switch in predictions was a success: ',100*(switch_success/nb_switch))

	Average reward:  212.89257648123777
	Percentage of positive rewards:  32.5 %
	Nb times we recommended based on LightFM results:  273
	Nb times the switch in predictions was a success:  31.135531135531135
	Average reward:  195.70383298090158
	Percentage of positive rewards:  27.0 %
	Nb times we recommended based on LightFM results:  230
	Nb times the switch in predictions was a success:  22.608695652173914
	Average reward:  168.80267799513533
	Percentage of positive rewards:  26.200000000000003 %
	Nb times we recommended based on LightFM results:  554
	Nb times the switch in predictions was a success:  27.79783393501805
	Average reward:  129.62339979739562
	Percentage of positive rewards:  23.1 %
	Nb times we recommended based on LightFM results:  772
	Nb times the switch in predictions was a success:  22.797927461139896
	Average reward:  108.08911179481099
	Percentage of positive rewards:  22.900000000000002 %
	Nb times we recommended based on LightFM results:  846
	Nb times the switc

$$ \begin{array}{|c|c|c|} \hline
   \textbf{margin} & \textbf{avg reward} & \textbf{% of postitve} \\ \hline
   0.5 & \textbf{212.89257648123777} & \textbf{32.5}  \\ 
   1   & 195.70383298090158 & 27.0 \\
   1.5 & 168.80267799513533 & 26.20  \\
   2   & 129.62339979739562 & 23.1 \\
   2.5 & 108.08911179481099 & 22.90 \\
   3   & 110.22737259594653 & 23.5  \\
   3.5 & 91.52149443484106 & 20.59  \\ \hline
\end{array} $$