In [None]:
# Installations and Package/Data Imports

!pip install -q cornac==1.15.4
!pip install -q statistics

import cornac
import numpy as np
import pandas as pd
import pickle
from statistics import harmonic_mean
from cornac.models import WMF, EASE
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP
from cornac.eval_methods import BaseMethod
from sklearn.model_selection import train_test_split

# print(f"System version: {sys.version}")
# print(f"Cornac version: {cornac.__version__}")

SEED = 42
VERBOSE = True


### Data Pre-Processing

In [None]:
# Data Imports

recommendations_df = pd.read_csv('recommendations_filtered.csv', encoding = 'utf-8')

train, test = train_test_split(recommendations_df, test_size = 0.2)

recommendations_df['app_id'].nunique()


In [None]:
# User IDs with at least 10 ratings

recommendations_df_drop =  recommendations_df[['user_id', 'app_id']].reset_index(drop = True)

reccs_df_rated_gb = recommendations_df_drop.groupby(by = ['user_id'], as_index = False, sort = False).count()

user_ids_with_atleast_10_reviews = set(reccs_df_rated_gb[(reccs_df_rated_gb['app_id'] >= 10)]['user_id'])

len(user_ids_with_atleast_10_reviews)


In [None]:
# Game IDs beyond defined Threshold

irecommendations_df_drop =  recommendations_df[['user_id', 'app_id']].reset_index(drop = True)

ireccs_df_rated_gb = irecommendations_df_drop.groupby(by = ['app_id'], as_index = False, sort = False).count()

ireccs_df_rated_gb.sort_values('user_id', inplace = True, ascending = False)

game_ids_below_reviews_threshold = set(ireccs_df_rated_gb[(ireccs_df_rated_gb['user_id'] >= 100) & (ireccs_df_rated_gb['user_id'] <= 7000)]['app_id'])

len(game_ids_below_reviews_threshold)

In [None]:
# Game IDs that defined users have already played

jrecommendations_df_drop =  recommendations_df[['user_id', 'app_id']].reset_index(drop = True)

jreccs_df_user_games = jrecommendations_df_drop[(jrecommendations_df_drop['user_id'].isin(user_ids_with_atleast_10_reviews))]

user_ids_with_reviewed_game_ids = dict(jreccs_df_user_games.groupby('user_id')['app_id'].apply(list))

len(user_ids_with_reviewed_game_ids)

### Model Instantiation and Training

In [None]:
lamb_values = [500, 1000, 1500, 2000]

eval_metrics = [
    
    cornac.metrics.NDCG(k = 20),
    cornac.metrics.NCRR(k = 20),
    cornac.metrics.Recall(k = 20)
    
]

eval_method = BaseMethod.from_splits(
    
    train_data = train.values,
    test_data = test.values,
    rating_threshold = 4,
    exclude_unknowns = False,
    verbose = True
    
)

for lamb in lamb_values:
    
    ease = EASE(
        
        name = f"EASE", 
        lamb = lamb,
        posB = True
    
    )
    
    print(f"Running EASE model with lamb = {lamb}")
    
    cornac.Experiment(eval_method = eval_method, models = [ease], metrics = eval_metrics).run()


### EASE Model Output Generation/Post-Processing

In [None]:
item_id2idx = dict(ease.train_set.iid_map)
user_id2idx = dict(ease.train_set.uid_map)

item_idx2id = list(ease.train_set.item_ids)
user_idx2id = list(ease.train_set.user_ids)

num_users = len(user_id2idx)
num_games = len(item_id2idx)

print(num_users, num_games)

result_output = {}

for i in user_ids_with_atleast_10_reviews:

    user_idx = user_id2idx[i]

    user_played_games = user_ids_with_reviewed_game_ids[i]

    result_output[i] = [item_idx2id[j] for j in ease.rank(user_idx)[0]

                      if

                        (
                            (item_idx2id[j] not in user_played_games) and
                            (item_idx2id[j] in game_ids_below_reviews_threshold)
                         )

                      ]


In [None]:
len(result_output)


In [None]:
result_output_truncated = {}

for user, games in result_output.items():
    result_output_truncated[user] = games[:10]
    

In [None]:
# Save the result_output_truncated dictionary to a pickle file

with open("ease_result_output_truncated.pkl", "wb") as file:
    pickle.dump(result_output_truncated, file)
    