In [None]:
# Installations and Package/Data Imports

!pip install -q cornac==1.15.4
!pip install -q statistics

import cornac
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from statistics import harmonic_mean
from cornac.data import Dataset, ImageModality
from cornac.data.reader import Reader
from cornac.eval_methods import BaseMethod, RatioSplit
from cornac.models import CausalRec, VBPR, VMF, BPR
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

# print(f"System version: {sys.version}")
# print(f"Cornac version: {cornac.__version__}")

SEED = 42
VERBOSE = True


### Data Imports and Pre-Processing

In [None]:
# Importing File with Ground Truth + Image Features PKL

recommendations_df = pd.read_csv('drive/MyDrive/recommendations_filtered.csv', encoding = 'utf-8')

def read_pickle(path):

    objects = []

    with (open(path, 'rb')) as openfile:

        while True:

            try:

                objects.append(pickle.load(openfile))

            except EOFError:

                break

        return objects

list_10k_games_vec = read_pickle('drive/MyDrive/game_image_features.pkl')

uid_data = list(recommendations_df.itertuples(index = False, name = None))

len(uid_data)


In [None]:
# Generating Image Vector array for input

vector_df = list_10k_games_vec[0]
vector_df = vector_df[vector_df['image_sub_id'].str.contains('_0')]

vector_df = vector_df[['game_id', 'vector']].astype({'game_id': 'int'})
vector_df = vector_df.sort_values(by = 'game_id').reset_index(drop = True)
vector_df

game_set = set(vector_df['game_id'])

image_features_list = []

idx_id_map = []

for i in vector_df.iloc:

    image_features_list.append(i[1])
    idx_id_map.append(i[0])

image_features = np.array(image_features_list)

print(image_features.shape, len(idx_id_map))


In [None]:
# User IDs with at least 10 ratings

recommendations_df_drop =  recommendations_df[['user_id', 'app_id']].reset_index(drop = True)

reccs_df_rated_gb = recommendations_df_drop.groupby(by = ['user_id'], as_index = False, sort = False).count()

user_ids_with_atleast_10_reviews = set(reccs_df_rated_gb[(reccs_df_rated_gb['app_id'] >= 10)]['user_id'])

len(user_ids_with_atleast_10_reviews)


In [None]:
# Game IDs beyond defined Threshold

irecommendations_df_drop =  recommendations_df[['user_id', 'app_id']].reset_index(drop = True)

ireccs_df_rated_gb = irecommendations_df_drop.groupby(by = ['app_id'], as_index = False, sort = False).count()

ireccs_df_rated_gb.sort_values('user_id', inplace = True, ascending = False)

game_ids_below_reviews_threshold = set(ireccs_df_rated_gb[(ireccs_df_rated_gb['user_id'] >= 100) & (ireccs_df_rated_gb['user_id'] <= 7000)]['app_id'])

len(game_ids_below_reviews_threshold)


In [None]:
# Game IDs that defined users have already played

jrecommendations_df_drop =  recommendations_df[['user_id', 'app_id']].reset_index(drop = True)

jreccs_df_user_games = jrecommendations_df_drop[(jrecommendations_df_drop['user_id'].isin(user_ids_with_atleast_10_reviews))]

user_ids_with_reviewed_game_ids = dict(jreccs_df_user_games.groupby('user_id')['app_id'].apply(list))

len(user_ids_with_reviewed_game_ids)


### Dataset and Model Instantiation

In [None]:
# Modality/Evaluation Method/Evaluation Metrics Setup

item_image_modality = ImageModality(features = np.array(image_features), ids = idx_id_map, normalized = True)

ratio_split = RatioSplit(
    
    data = np.array(uid_data),
    test_size = 0.1,
    rating_threshold = 4,
    exclude_unknowns = False,
    verbose = VERBOSE,
    seed = SEED,
    item_image = item_image_modality
    
)

auc = cornac.metrics.AUC()
rec_20 = cornac.metrics.Recall(k = 20)
ndcg_20 = cornac.metrics.NDCG(k = 20)
ncrr_20 = cornac.metrics.NCRR(k = 20)

metrics_list = [auc, rec_20, ndcg_20, ncrr_20]


In [None]:
# Model Definitions (BPR, VBPR, VMF, CausalRec)

bpr = BPR( # Control

    verbose = VERBOSE,
    seed = SEED

)

vbpr = cornac.models.VBPR(
    
    k = 10,
    k2 = 20,
    n_epochs = 5,
    batch_size = 100,
    learning_rate = 0.005,
    lambda_w = 1,
    lambda_b = 0.01,
    lambda_e = 0.0,
    use_gpu = True,
    verbose = VERBOSE,
    seed = SEED
    
)

vmf = cornac.models.VMF(
    
    k = 10,
    d = 10,
    n_epochs = 5,
    batch_size = 100,
    learning_rate = 0.001,
    gamma = 0.9,
    lambda_u = 0.001,
    lambda_v = 0.001,
    lambda_p = 1.0,
    lambda_e = 10.0,
    use_gpu = True,
    verbose = VERBOSE,
    seed = SEED
    
)

causalrec = cornac.models.CausalRec(
    
    k = 32,
    k2 = 32,
    n_epochs = 3,
    batch_size = 100,
    learning_rate = 0.001,
    lambda_w = 1,
    lambda_b = 0.01,
    lambda_e = 0.0,
    mean_feat = image_features.mean(axis = 0),
    tanh = 1,
    lambda_2 = 0.8,
    use_gpu = True,
    verbose = VERBOSE,
    seed = SEED
    
)


In [None]:
# Model Training (VBPR)

cornac.Experiment(eval_method = ratio_split, models = [vbpr], metrics = metrics_list).run()


### VBPR Model Output Generation/Post-Processing

In [None]:
item_id2idx = dict(vbpr.train_set.iid_map)
user_id2idx = dict(vbpr.train_set.uid_map)

item_idx2id = list(vbpr.train_set.item_ids)
user_idx2id = list(vbpr.train_set.user_ids)

num_users = len(user_id2idx)
num_games = len(item_id2idx)

print(num_users, num_games)

result_output = {}

for i in tqdm(user_ids_with_atleast_10_reviews):

    user_idx = user_id2idx[i]

    user_played_games = user_ids_with_reviewed_game_ids[i]

    result_output[i] = [item_idx2id[j] for j in vbpr.rank(user_idx)[0]

                      if

                        (
                            (item_idx2id[j] not in user_played_games) and
                            (item_idx2id[j] in game_ids_below_reviews_threshold)
                         )

                      ]


In [None]:
result_output_truncated = {}

for user, games in tqdm(result_output.items()):

    result_output_truncated[user] = games[:10]
    

In [None]:
with open('drive/MyDrive/vbpr_output_for_users_with_atleast_10_reviews_POST_PROCESSING_DONE.pkl', 'wb') as outp:

    pickle.dump(result_output, outp, pickle.HIGHEST_PROTOCOL)
    

In [None]:
with open('drive/MyDrive/vbpr_output_for_users_with_atleast_10_reviews_POST_PROCESSING_DONE_TRUNCATED.pkl', 'wb') as outp:

    pickle.dump(result_output_truncated, outp, pickle.HIGHEST_PROTOCOL)
    

### VBPR Image Contribution Analysis

In [None]:
from collections import defaultdict

game_df = pd.read_csv('./drive/MyDrive/games.csv', encoding = 'utf-8')

UID = 19
UIDX = user_id2idx[UID]

img_features = vbpr.train_set.item_image.features

rated_items = [item_idx2id[i] for i in np.nonzero(vbpr.train_set.matrix[UIDX].A)[1]]

print(f"RATED ITEMS BY USER {UID}: {rated_items}")

rated_df = defaultdict(list)

for i in rated_items:

    rated_df["Game ID"].append(i)
    rated_df["Name"].append(game_df.loc[game_df['app_id'] == i, 'title'].item())

pd.DataFrame(rated_df)


In [None]:
TOPK = 2264

recommendations, scores = vbpr.rank(UIDX)

print(f"\nTOP {TOPK} RECOMMENDATIONS:")

rec_df = defaultdict(list)

for i in recommendations[:TOPK]:

    game_id = item_idx2id[i]

    cf_contrib = vbpr.beta_item[i] + np.dot(vbpr.gamma_item[i], vbpr.gamma_user[UIDX])

    vis_contrib = vbpr.visual_bias[i] + np.dot(vbpr.theta_item[i], vbpr.theta_user[UIDX])

    rec_df["Game ID"].append(game_id)

    rec_df["Score"].append(scores[i])

    rec_df["CF Contribution"].append(cf_contrib)

    rec_df["Visual Contribution"].append(vis_contrib)

    rec_df["Title"].append(game_df.loc[game_df['app_id'] == game_id, 'title'].item())

rec_df_calculated = pd.DataFrame(rec_df)

rec_df_calculated.to_csv(f'./drive/MyDrive/{UID}_cfc_vc.csv')


### Saving VBPR Model

In [None]:
def save_pickle(obj, path):

    with open(path, 'wb') as outp:

        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

save_pickle(vbpr, f'./drive/MyDrive/vbpr_full_pickle.pkl')


### Training Other (Image) Models

In [None]:
# Model Training (BPR)

cornac.Experiment(eval_method = ratio_split, models = [bpr], metrics = metrics_list).run()


In [None]:
# Model Training (VMF)

cornac.Experiment(eval_method = ratio_split, models = [vmf], metrics = metrics_list).run()


In [None]:
# Model Training (CausalRec)

cornac.Experiment(eval_method = ratio_split, models = [causalrec], metrics = metrics_list).run()
