# Deep Recomender RL System 

The project is trying to improve the performance of Reinforcement Learning based recommender systems. The report contains data on how using the actor network with an embedding layer, reducing overestimated Q value, using several pretrained embedding or applying PER aids in accuracy gains.

## Prerequisites

In [17]:
import pandas as pd
import numpy as np
import itertools

import logging, os

logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf
import matplotlib.pyplot as plt
import plotly.express as px

import time
import tqdm


ROOT_DIR = os.getcwd()
DATA_DIR = os.path.join(ROOT_DIR, '')
STATE_SIZE = 10

In [7]:
!git clone https://github.com/backgom2357/Recommender_system_via_deep_RL.git

Cloning into 'Recommender_system_via_deep_RL'...
remote: Enumerating objects: 385, done.[K
remote: Counting objects: 100% (385/385), done.[K
remote: Compressing objects: 100% (271/271), done.[K
remote: Total 385 (delta 203), reused 288 (delta 111), pack-reused 0[K
Receiving objects: 100% (385/385), 58.36 MiB | 27.19 MiB/s, done.
Resolving deltas: 100% (203/203), done.


In [13]:
%cd Recommender_system_via_deep_RL/

[Errno 2] No such file or directory: 'Recommender_system_via_deep_RL/'
/content/Recommender_system_via_deep_RL


In [10]:
!pip install wandb



## Download the dataset 

In [2]:
!mkdir ~/.kaggle

In [1]:
!pip install kaggle



In [3]:
!cp kaggle.json ~/.kaggle/

In [4]:
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets download -d odedgolden/movielens-1m-dataset

Downloading movielens-1m-dataset.zip to /content
  0% 0.00/5.83M [00:00<?, ?B/s]
100% 5.83M/5.83M [00:00<00:00, 95.4MB/s]


In [14]:
!unzip ./movielens-1m-dataset.zip

Archive:  ./movielens-1m-dataset.zip
replace README? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace movies.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace ratings.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace users.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


## Load the dataset - list of ratings, users, movies, ratings

In [18]:
ratings_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'ratings.dat'), 'r').readlines()]
users_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'users.dat'), 'r').readlines()]
movies_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'movies.dat'),encoding='latin-1').readlines()]
ratings_df = pd.DataFrame(ratings_list, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype = np.uint32)
movies_df = pd.DataFrame(movies_list, columns = ['MovieID', 'Title', 'Genres'])
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)
users_df = pd.DataFrame(users_list, columns=['UserID','Gender','Age','Occupation','Zip-code'])

  exec(code_obj, self.user_global_ns, self.user_ns)


In [19]:
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list}
len(movies_list)

3883

In [20]:
movies_df.head(5)

Unnamed: 0,MovieID,Title,Genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [21]:
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [23]:
genres = ['Action',
        'Adventure',
        'Animation',
        "Children's",
        'Comedy',
        'Crime',
        'Documentary',
        'Drama',
        'Fantasy',
        'Film-Noir',
        'Horror',
        'Musical',
        'Mystery',
        'Romance',
        'Sci-Fi',
        'Thriller',
        'War',
        'Western']

In [None]:
!python train.py

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
recommended items : 213,  epsilon : 0.875, reward : -0.5
897/5000, precision : 41%, total_reward:59.5, q_loss : 8.533352956874296e-05, mean_action : -0.044209032393441434
recommended items : 23,  epsilon : 0.875, reward : -0.5
898/5000, precision :  7%, total_reward:-5.0, q_loss : 8.480729593429714e-05, mean_action : -0.05520304368092463
recommended items : 40,  epsilon : 0.875, reward : -0.5
899/5000, precision : 33%, total_reward:5.0, q_loss : 8.121372229652479e-05, mean_action : -0.14479264005025227
recommended items : 116,  epsilon : 0.875, reward : -0.5
900/5000, precision : 27%, total_reward:10.5, q_loss : 7.793539407430217e-05, mean_action : -0.10637297760765507
recommended items : 60,  epsilon : 0.874, reward : -0.5
901/5000, precision : 24%, total_reward:-1.5, q_loss : 0.00012888821947854012, mean_action : 0.04453431379795074
recommended items : 59,  epsilon : 0.874, reward : -0.5
902/5000, precision : 30%, total

In [31]:
ratings_df.isnull().sum()

UserID       0
MovieID      0
Rating       0
Timestamp    0
dtype: int64

In [32]:
print(len(set(ratings_df["UserID"])) == max([int(i) for i in set(ratings_df["UserID"])]))
print(max([int(i) for i in set(ratings_df["UserID"])]))

True
6040


In [33]:
ratings_df = ratings_df.applymap(int)

In [34]:
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [36]:
users_dict = {user : [] for user in set(ratings_df["UserID"])}
users_dict[1]
ratings_df_gen = ratings_df.iterrows()
users_dict_for_history_len = {user : [] for user in set(ratings_df["UserID"])}
for data in ratings_df_gen:
    users_dict[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))
    if data[1]['Rating'] >= 4:
        users_dict_for_history_len[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))

In [37]:
users_history_lens = [len(users_dict_for_history_len[u]) for u in set(ratings_df["UserID"])]

In [38]:
users_num = max(ratings_df["UserID"])+1
items_num = max(ratings_df["MovieID"])+1

In [39]:
print(users_num, items_num)

6041 3953


In [40]:
train_users_num = int(users_num * 0.8)
train_items_num = items_num
print(train_users_num, train_items_num)

4832 3953


In [41]:
train_users_dict = {k:users_dict[k] for k in range(1, train_users_num+1)}
train_users_history_lens = users_history_lens[:train_users_num]
print(len(train_users_dict),len(train_users_history_lens))

4832 4832


In [42]:
eval_users_num = int(users_num * 0.2)
eval_items_num = items_num
print(eval_users_num, eval_items_num)

1208 3953


In [43]:
eval_users_dict = {k:users_dict[k] for k in range(users_num-eval_users_num, users_num)}
eval_users_history_lens = users_history_lens[-eval_users_num:]
print(len(eval_users_dict),len(eval_users_history_lens))

1208 1208


In [44]:
def evaluate(recommender, env, check_movies = False, top_k=False):

    # episodic reward 리
    episode_reward = 0
    steps = 0
    mean_precision = 0
    mean_ndcg = 0
    # Environment 리
    user_id, items_ids, done = env.reset()
    if check_movies:
        print(f'user_id : {user_id}, rated_items_length:{len(env.user_items)}')
        print('items : \n', np.array(env.get_items_names(items_ids)))

    while not done:

        # Observe current state & Find action
        ## Embedding model
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        ## SRM state
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        ## Action(ranking score) 
        action = recommender.actor.network(state)
        ## Item 
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        if check_movies:
            print(f'recommended items ids : {recommended_item}')
            print(f'recommened items : \n {np.array(env.get_items_names(recommended_item), dtype=object)}')
        # Calculate reward & observe new state (in env)
        ## Step
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(reward))])
            mean_ndcg += dcg/idcg
            
            #precision
            correct_num = top_k-correct_list.count(0)
            mean_precision += correct_num/top_k
            
        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1
        
        if check_movies:
            print(f'precision : {correct_num/top_k}, dcg : {dcg:0.3f}, idcg : {idcg:0.3f}, ndcg : {dcg/idcg:0.3f}, reward : {reward}')
            print()
        break
    
    if check_movies:
        print(f'precision : {mean_precision/steps}, ngcg : {mean_ndcg/steps}, episode_reward : {episode_reward}')
        print()
    
    return mean_precision/steps, mean_ndcg/steps

def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [45]:
tf.keras.backend.set_floatx('float64')

In [48]:
from envs import OfflineEnv
from recommender import DRRAgent

sum_precision = 0
sum_ndcg = 0
TOP_K = 10

for user_id in eval_users_dict.keys():
    env = OfflineEnv(eval_users_dict, users_history_lens, movies_id_to_movies, STATE_SIZE, fix_user_id=user_id)
    recommender = DRRAgent(env, users_num, items_num, STATE_SIZE)
    recommender.actor.build_networks()
    recommender.critic.build_networks()
    recommender.load_model('./save_weights/actor_5000_fixed.h5', 
                           './save_weights/critic_5000_fixed.h5')
    precision, ndcg = evaluate(recommender, env, top_k=TOP_K)
    sum_precision += precision
    sum_ndcg += ndcg
    
print(f'precision@{TOP_K} : {sum_precision/len(eval_users_dict)}, ndcg@{TOP_K} : {sum_ndcg/len(eval_users_dict)}')

precision@10 : 0.44006622516556254, ndcg@10 : 0.42852527230787213
