In [None]:
import os
import json
import pandas as pd
from tqdm import tqdm
from script import GET_RECOMMENDATION
import time
import gc

start = time.time()

# Initialize book title:
books_title = pd.read_json('./data/books_title.json')
books_title['book_id'] = books_title['book_id'].astype(str)

# file path for 10 test users
path = './data/users/'

# store test-train-top recommendation for each user. key = user_xxx
dictionary = {} 

for file in tqdm(os.listdir(path), total = 10, desc = 'Processing', mininterval = 0.2):

    file_path = os.path.join(path, file)
    user_info = pd.read_csv(file_path)

    # construct model class
    recommend = GET_RECOMMENDATION(user = user_info, book_title = books_title, percentage = 35, split_size = 0.3, current_user = file[:-4])

    csv_book_map = recommend.book_mapping()
    filtered_users = recommend.similar_users(book_map = csv_book_map)
    interaction_list = recommend.interactions(filtered_users, csv_book_map)
    
    # Free RAM
    del csv_book_map
    del filtered_users
    gc.collect()

    top_recommendations = recommend.collab_filter(interaction_list)
    top_recommendations = top_recommendations['book_id'].to_numpy().astype(int)
    
    # create dictionary entry per user for evaluation purpose
    train_data = recommend.get_train_set()
    train_data = train_data['book_id'].to_numpy().astype(int)

    test_data = recommend.get_test_set()
    test_data = test_data['book_id'].to_numpy().astype(int)

    print ('Creating dictionary for ' + file[:-4])
    dictionary[file[:-4]] = [train_data.tolist(), test_data.tolist(), top_recommendations.tolist()]
    
    # clear variables to free up memory
    del recommend
    del interaction_list
    del top_recommendations
    del train_data
    del test_data
    gc.collect()
    
    time.sleep(3)

# save dictionary as JSON file
with open('./data/dict/top_recs.json', 'w') as f:
    json.dump(dictionary, f)

print ('Finished Process in {:.3f} min'.format((time.time() - start)/60.0))

In [None]:
# To load this
# with open('./data/dict/top_recs.json') as file:
#       data = json.load(file)

'''
    dictionary contains each of the 10 users with training data @ index 0, testing data @ index 1 and top N recommendations @ index 2
    We can now see how many matches we get between index 1 and index 0.

    To improve accuracy, we can try to increase percentage (arg for GET_RECOMMENDATION). See script.py for other possible hyperparameter.
'''