In [1]:
#Dependencies
import pandas as pd
import numpy as np
import tensorflow as tf
import itertools
import matplotlib.pyplot as plt
import time

from envs import OfflineEnv
from recommender import DRRAgent

import os

ROOT_DIR = os.getcwd()
DATA_DIR = os.path.join(ROOT_DIR, 'ml-1m/')
STATE_SIZE = 10

In [175]:
def evaluate(recommender, env, top_k=False):

        recommender.load_model('/home/diominor/Workspace/DRR/save_weights/actor_7000.h5', '/home/diominor/Workspace/DRR/save_weights/critic_7000.h5')

        # episodic reward 리셋
        episode_reward = 0
        correct_count = 0
        steps = 0
        # Environment 리셋
        user_id, items_ids, done = env.reset()
        print(f'user_id : {user_id}, rated_items_length:{len(env.user_items)}')
        print('items : \n', np.array(env.get_items_names(items_ids)))
        
        while not done:
            
            # Observe current state & Find action
            ## Embedding 해주기
            user_id = tf.convert_to_tensor(user_id)
            items_ids = tf.convert_to_tensor(items_ids)
            user_eb, items_eb = recommender.actor.embedding_network(user_id, items_ids)
            user_eb = tf.reshape(user_eb, (1,1, *user_eb.shape))
            items_eb = tf.reshape(items_eb, (1,*items_eb.shape))
            ## Action(ranking score) 출력
            action, _ = recommender.actor.network(user_eb, items_eb)
            ## Item 추천
            recommended_item = recommender.actor.recommend_item(action, env.recommended_items, top_k=top_k, is_test=True)
            print(f'recommended items ids : {recommended_item}')
            print(f'recommened items : \n {np.array(env.get_items_names(recommended_item), dtype=object)}')
            # Calculate reward & observe new state (in env)
            ## Step
            next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
            items_ids = next_items_ids
            episode_reward += reward
            steps += 1
            if reward > 0:
                correct_count += 1
                print('GOT IT!!!!')
            print()  

        print(f'precision : {correct_count/steps}, episode_reward : {episode_reward}')
        print()

In [3]:
#Loading datasets
ratings_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'ratings.dat'), 'r').readlines()]
users_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'users.dat'), 'r').readlines()]
movies_list = [i.strip().split("::") for i in open(os.path.join(DATA_DIR,'movies.dat'),encoding='latin-1').readlines()]
ratings_df = pd.DataFrame(ratings_list, columns = ['UserID', 'MovieID', 'Rating', 'Timestamp'], dtype = np.uint32)
movies_df = pd.DataFrame(movies_list, columns = ['MovieID', 'Title', 'Genres'])
movies_df['MovieID'] = movies_df['MovieID'].apply(pd.to_numeric)

In [4]:
# 영화 id를 영화 제목으로
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list}

In [5]:
len(movies_list)

3883

In [6]:
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [7]:
# # 사용자가 선택한 영화 분포 확인
# plt.figure(figsize=(20,10))
# plt.hist(ratings_df["MovieID"], bins=3883)
# plt.show()

In [8]:
# 결측치 확인
ratings_df.isnull().sum()

UserID       0
MovieID      0
Rating       0
Timestamp    0
dtype: int64

In [9]:
# 최대값 확인
print(len(set(ratings_df["UserID"])) == max([int(i) for i in set(ratings_df["UserID"])]))
print(max([int(i) for i in set(ratings_df["UserID"])]))

True
6040


In [10]:
ratings_df = ratings_df.applymap(int)

In [11]:
# 유저별로 본 영화들 순서대로 정리
users_dict = {user : [] for user in set(ratings_df["UserID"])}
users_dict[1]

[]

In [12]:
# 시간 순으로 정렬하기
ratings_df = ratings_df.sort_values(by='Timestamp', ascending=True)
ratings_df.head(5)

Unnamed: 0,UserID,MovieID,Rating,Timestamp
1000138,6040,858,4,956703932
1000153,6040,2384,4,956703954
999873,6040,593,5,956703954
1000007,6040,1961,4,956703977
1000192,6040,2019,5,956703977


In [13]:
# 유저 딕셔너리에 (영화, 평점)쌍 넣기
ratings_df_gen = ratings_df.iterrows()
for data in ratings_df_gen:
    users_dict[data[1]['UserID']].append((data[1]['MovieID'], data[1]['Rating']))
users_dict[1]

[(3186, 4),
 (1721, 4),
 (1270, 5),
 (1022, 5),
 (2340, 3),
 (1836, 5),
 (3408, 4),
 (1207, 4),
 (2804, 5),
 (260, 4),
 (720, 3),
 (1193, 5),
 (919, 4),
 (608, 4),
 (2692, 4),
 (1961, 5),
 (2028, 5),
 (3105, 5),
 (938, 4),
 (1035, 5),
 (1962, 4),
 (1028, 5),
 (2018, 4),
 (150, 5),
 (1097, 4),
 (914, 3),
 (1287, 5),
 (2797, 4),
 (1246, 4),
 (2762, 4),
 (661, 3),
 (2918, 4),
 (531, 4),
 (3114, 4),
 (2791, 4),
 (1029, 5),
 (2321, 3),
 (1197, 3),
 (594, 4),
 (2398, 4),
 (1545, 4),
 (527, 5),
 (745, 3),
 (595, 5),
 (588, 4),
 (1, 5),
 (2687, 3),
 (783, 4),
 (2294, 4),
 (2355, 5),
 (1907, 4),
 (1566, 4),
 (48, 5)]

In [14]:
# 각 유저별 영화 히스토리 길이
users_history_lens = [len(users_dict[u]) for u in set(ratings_df["UserID"])]
users_history_lens[:10]

[53, 129, 51, 21, 198, 71, 31, 139, 106, 401]

In [15]:
users_dict[1]

[(3186, 4),
 (1721, 4),
 (1270, 5),
 (1022, 5),
 (2340, 3),
 (1836, 5),
 (3408, 4),
 (1207, 4),
 (2804, 5),
 (260, 4),
 (720, 3),
 (1193, 5),
 (919, 4),
 (608, 4),
 (2692, 4),
 (1961, 5),
 (2028, 5),
 (3105, 5),
 (938, 4),
 (1035, 5),
 (1962, 4),
 (1028, 5),
 (2018, 4),
 (150, 5),
 (1097, 4),
 (914, 3),
 (1287, 5),
 (2797, 4),
 (1246, 4),
 (2762, 4),
 (661, 3),
 (2918, 4),
 (531, 4),
 (3114, 4),
 (2791, 4),
 (1029, 5),
 (2321, 3),
 (1197, 3),
 (594, 4),
 (2398, 4),
 (1545, 4),
 (527, 5),
 (745, 3),
 (595, 5),
 (588, 4),
 (1, 5),
 (2687, 3),
 (783, 4),
 (2294, 4),
 (2355, 5),
 (1907, 4),
 (1566, 4),
 (48, 5)]

In [16]:
users_num = max(ratings_df["UserID"])+1
items_num = max(ratings_df["MovieID"])+1

In [17]:
print(users_num, items_num)

6041 3953


### 준비된것
users_dict, users_history_len, movies_id_to_movies, sers_num, items_num

In [18]:
tf.keras.backend.set_floatx('float64')

In [216]:
env = OfflineEnv(users_dict, users_history_lens, movies_id_to_movies, STATE_SIZE)
recommender = DRRAgent(env, users_num, items_num, STATE_SIZE)
recommender.actor.build_networks()
recommender.critic.build_networks()
evaluate(recommender, env, top_k=20)

user_id : 3081, rated_items_length:384
items : 
 [['Big Trouble in Little China (1986)' 'Action|Comedy']
 ["One Flew Over the Cuckoo's Nest (1975)" 'Drama']
 ['Pelican Brief, The (1993)' 'Thriller']
 ['Clockwork Orange, A (1971)' 'Sci-Fi']
 ['Rocky III (1982)' 'Action|Drama']
 ['Back to the Future (1985)' 'Comedy|Sci-Fi']
 ['Star Wars: Episode IV - A New Hope (1977)'
  'Action|Adventure|Fantasy|Sci-Fi']
 ['Godfather: Part II, The (1974)' 'Action|Crime|Drama']
 ['Godfather, The (1972)' 'Action|Crime|Drama']
 ['Raiders of the Lost Ark (1981)' 'Action|Adventure']]
recommended items ids : [3240 3400 2199 1809  999 1145 3761 1073   42    8 1366 1295 2911 1491
 2992 1832  253  569 2467 2726]
recommened items : 
 [list(['Big Tease, The (1999)', 'Comedy'])
 list(["We're Back! A Dinosaur's Story (1993)", "Animation|Children's"])
 list(['Phoenix (1998)', 'Crime|Drama'])
 list(['Hana-bi (1997)', 'Comedy|Crime|Drama'])
 list(['2 Days in the Valley (1996)', 'Crime'])
 list(['Snowriders (1996)', 'Do