# Import libraries

In [1]:
import pandas as pd
from torch.utils.data import DataLoader
from dataset import NCFDataset
from recom_ncf import NCFRecommender
from evaluation import Evaluation
from helpers.index_manager import IndexManager
from helpers.dataloader_custom_functions import collate_fn, worker_init_fn
from helpers.cache import CacheType
from helpers.mem_map_dataloader import MemMapDataLoader
from ncf import ModelType

# Loading data

In [2]:
df_train = pd.read_csv('../data/train-leave2.csv')
df_val = pd.read_csv('../data/val-leave2.csv')
df_test = pd.read_csv('../data/test-leave2.csv')
index_manager = IndexManager()
index_manager.load('../data/index.pkl')

# Creating datasets

In [3]:
audio_dataloader = MemMapDataLoader(file_dir='D:/audio-features/mean', index_manager=index_manager, cache_type=CacheType.UNLIMITED, embed_dim=128)

In [4]:
train_dataset = NCFDataset(df_train, audio_dataloader=audio_dataloader)
val_dataset = NCFDataset(df_val, audio_dataloader=audio_dataloader)

dataloader_params = {
    'batch_size': 4096,
    'num_workers': 6,
    'persistent_workers': True,
    'prefetch_factor': 4,
    'pin_memory': True,
    'pin_memory_device': 'cuda',
    'collate_fn': collate_fn,
    'worker_init_fn': worker_init_fn,
}

train_dataloader = DataLoader(train_dataset, shuffle=True, **dataloader_params)
eval_dataloader = DataLoader(val_dataset, shuffle=False, **dataloader_params)

# Training model

In [5]:
params =  {'factors': 64, 'mlp_user_item_dim': 128, 'learning_rate': 0.0005, 'epochs': 50, 'optimizer': 'adagrad', 'dropout': 0.5, 'weight_decay': 0.0005, 'loss_fn': 'bce', 'audio_dim': 64}

unique_users = index_manager.get_indexed_values('user_id')
unique_items = index_manager.get_indexed_values('item_id')

model = NCFRecommender(unique_users, unique_items, audio_dataloader=audio_dataloader, model_type=ModelType.LATE_FUSION, **params)
model.fit(train_dataloader, eval_dataloader)

Epoch 1/50
Train loss: 0.623665, Validation loss: 0.646860
Epoch 2/50
Train loss: 0.595719, Validation loss: 0.642087
Epoch 3/50
Train loss: 0.575809, Validation loss: 0.634271
Epoch 4/50
Train loss: 0.547077, Validation loss: 0.627499
Epoch 5/50
Train loss: 0.518738, Validation loss: 0.630876
Epoch 6/50
Train loss: 0.503532, Validation loss: 0.633575
Epoch 7/50
Train loss: 0.496240, Validation loss: 0.634908
Epoch 8/50
Train loss: 0.490922, Validation loss: 0.631623
Epoch 9/50
Train loss: 0.486604, Validation loss: 0.630693
Early stopping triggered after 9 epochs
Training completed!


# Evaluating model

In [6]:
# evaluator = Evaluation(recommender=model, test_data=df_test)
# metrics = evaluator.evaluate(user_batch_size=128, item_batch_size=1024)

In [7]:
# metrics

In [8]:

user_count = df_train.groupby('user_id').count()

In [9]:
user_count[user_count['rating_imp'] == 10].index

Index([    17,    229,    257,    443,    488,    490,    686,    853,   1524,
         1789,
       ...
       591209, 592231, 595300, 606815, 648957, 650130, 657774, 658095, 664050,
       671908],
      dtype='int64', name='user_id', length=410)

In [10]:
df_train[df_train['user_id'] == 17][['item_id', 'name', 'publishers', 'genres', 'categories', 'tags']]

Unnamed: 0,item_id,name,publishers,genres,categories,tags
45,48937,Fallout 4,['Bethesda Softworks'],['RPG'],"['Single-player', 'Steam Achievements', 'Full ...","['Open World', 'Post-apocalyptic', 'Singleplay..."
46,10791,GUILTY GEAR -STRIVE-,['Arc System Works'],['Action'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","['Fighting', '2D Fighter', 'Great Soundtrack',..."
47,40162,Quake Live,['Bethesda Softworks'],['Action'],"['Single-player', 'Multi-player', 'Steam Achie...","['FPS', 'Arena Shooter', 'Multiplayer', 'Class..."
48,38973,Depth,['Digital Confectioners'],"['Action', 'Indie']","['Single-player', 'Multi-player', 'PvP', 'Onli...","['Shooter', 'Multiplayer', 'Underwater', 'Atmo..."
49,6525,Grand Theft Auto IV: The Complete Edition,['Rockstar Games'],"['Action', 'Adventure']","['Single-player', 'Multi-player', 'Partial Con...","['Open World', 'Action', 'Automobile Sim', 'Cr..."
50,33116,METAL GEAR RISING: REVENGEANCE,['KONAMI'],['Action'],"['Single-player', 'Steam Achievements', 'Full ...","['Great Soundtrack', 'Hack and Slash', 'Action..."
51,14912,Forza Horizon 5,['Xbox Game Studios'],"['Action', 'Adventure', 'Racing', 'Simulation'...","['Single-player', 'Multi-player', 'PvP', 'Onli...","['Racing', 'Open World', 'Driving', 'Multiplay..."
52,0,Counter-Strike,['Valve'],['Action'],"['Multi-player', 'PvP', 'Online PvP', 'Shared/...","['Action', 'FPS', 'Multiplayer', 'Shooter', 'C..."
53,65488,Valheim,['Coffee Stain Publishing'],"['Action', 'Adventure', 'Indie', 'RPG', 'Early...","['Single-player', 'Multi-player', 'Co-op', 'On...","['Open World Survival Craft', 'Survival', 'Onl..."
54,51431,S.T.A.L.K.E.R.: Shadow of Chernobyl,['GSC Game World'],"['Action', 'RPG']","['Single-player', 'Multi-player', 'PvP', 'Onli...","['Atmospheric', 'Post-apocalyptic', 'Open Worl..."


In [11]:
df_features = pd.read_csv('../data/metadata-clean.csv')

In [12]:
actual_items = df_train[df_train['user_id'] == 17]['item_id'].values
actual_items = [index_manager.get_id('item_id', idx) for idx in actual_items]

In [13]:
actual_items = df_features[df_features['item_id'].isin(actual_items)][['item_id', 'name', 'publishers', 'genres', 'categories', 'tags']]
actual_items

Unnamed: 0,item_id,name,publishers,genres,categories,tags
73619,4500,S.T.A.L.K.E.R.: Shadow of Chernobyl,['GSC Game World'],"['Action', 'RPG']","['Single-player', 'Multi-player', 'PvP', 'Onli...","['Atmospheric', 'Post-apocalyptic', 'Open Worl..."
73634,10,Counter-Strike,['Valve'],['Action'],"['Multi-player', 'PvP', 'Online PvP', 'Shared/...","['Action', 'FPS', 'Multiplayer', 'Shooter', 'C..."
73657,377160,Fallout 4,['Bethesda Softworks'],['RPG'],"['Single-player', 'Steam Achievements', 'Full ...","['Open World', 'Post-apocalyptic', 'Singleplay..."
73899,235460,METAL GEAR RISING: REVENGEANCE,['KONAMI'],['Action'],"['Single-player', 'Steam Achievements', 'Full ...","['Great Soundtrack', 'Hack and Slash', 'Action..."
74142,274940,Depth,['Digital Confectioners'],"['Action', 'Indie']","['Single-player', 'Multi-player', 'PvP', 'Onli...","['Shooter', 'Multiplayer', 'Underwater', 'Atmo..."
74379,282440,Quake Live,['Bethesda Softworks'],['Action'],"['Single-player', 'Multi-player', 'Steam Achie...","['FPS', 'Arena Shooter', 'Multiplayer', 'Class..."
75737,12210,Grand Theft Auto IV: The Complete Edition,['Rockstar Games'],"['Action', 'Adventure']","['Single-player', 'Multi-player', 'Partial Con...","['Open World', 'Action', 'Automobile Sim', 'Cr..."
78176,1551360,Forza Horizon 5,['Xbox Game Studios'],"['Action', 'Adventure', 'Racing', 'Simulation'...","['Single-player', 'Multi-player', 'PvP', 'Onli...","['Racing', 'Open World', 'Driving', 'Multiplay..."
81509,892970,Valheim,['Coffee Stain Publishing'],"['Action', 'Adventure', 'Indie', 'RPG', 'Early...","['Single-player', 'Multi-player', 'Co-op', 'On...","['Open World Survival Craft', 'Survival', 'Onl..."
88091,1384160,GUILTY GEAR -STRIVE-,['Arc System Works'],['Action'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","['Fighting', '2D Fighter', 'Great Soundtrack',..."


In [14]:
recommended_items = model.batch_predict_for_users([17])[17]
recommended_items = [index_manager.get_id('item_id', idx) for idx in recommended_items]

Processing 1 of 1 users... (0.00%)
Memory usage: 0.681884765625
Increased user batch size from 128 to 141
Increased item batch size from 1024 to 1126


In [15]:
recommended_items = df_features[df_features['item_id'].isin(recommended_items)][['item_id', 'name', 'publishers', 'genres', 'categories', 'tags']]
recommended_items

Unnamed: 0,item_id,name,publishers,genres,categories,tags
3938,1508620,AV Company | AV会社大冒险,['Big Breast Studio'],"['Action', 'Adventure', 'Casual', 'Indie', 'RP...","['Single-player', 'Steam Cloud', 'Family Shari...",[]
5329,986990,aMAZE Christmas,['Blender Games'],"['Adventure', 'Casual', 'Indie', 'Simulation',...","['Single-player', 'Steam Achievements', 'Steam...","['Strategy', 'Singleplayer', 'Puzzle', 'Fast-P..."
6128,963960,NEONARCADE: adventure puzzle muse,['Nozhin games studio'],"['Adventure', 'Casual', 'Indie']","['Single-player', 'Steam Achievements', 'Famil...","['Adventure', 'Indie', 'Casual']"
14206,749990,Trigonometry,['XiNFiNiTY Games'],"['Action', 'Casual', 'Indie']","['Single-player', 'Steam Achievements', 'Full ...","['Indie', 'Casual', 'Action', 'Singleplayer', ..."
23403,1966720,Lethal Company,['Zeekerss'],"['Action', 'Adventure', 'Indie', 'Early Access']","['Single-player', 'Multi-player', 'Co-op', 'On...","['Online Co-Op', 'Horror', 'First-Person', 'Co..."
29332,523810,Wonder Boy Returns,"['CFK Co., Ltd.']","['Action', 'Casual']","['Single-player', 'Steam Achievements', 'Full ...","['Casual', 'Action', 'Platformer', 'Classic', ..."
29485,519870,Flying Pengy,['Bogemic Games'],"['Casual', 'Indie']","['Single-player', 'Steam Achievements', 'Full ...","['Casual', 'Indie', 'Arcade', 'Funny', 'Old Sc..."
37956,435140,Rush for gold: Alaska,['Rainbow Games'],"['Casual', 'Strategy']","['Single-player', 'Steam Trading Cards', 'Fami...","['Strategy', 'Casual', 'Time Management']"
40686,346210,Garden Rescue,['Rainbow Games'],"['Casual', 'Strategy']","['Single-player', 'Steam Trading Cards', 'Fami...","['Strategy', 'Casual', 'Tower Defense']"
42341,282010,Carmageddon Max Pack,['THQ Nordic'],"['Action', 'Indie', 'Racing']","['Single-player', 'Multi-player', 'Steam Tradi...","['Combat Racing', 'Racing', 'Gore', 'Action', ..."


In [16]:
import ast
recommended_items['tags'] = recommended_items['tags'].apply(lambda x: ast.literal_eval(x))
recommended_items['categories'] = recommended_items['categories'].apply(lambda x: ast.literal_eval(x))
recommended_items['genres'] = recommended_items['genres'].apply(lambda x: ast.literal_eval(x))

In [17]:
recommended_tags = recommended_items.explode('tags')['tags'].unique()
recommended_categories = recommended_items.explode('categories')['categories'].unique()
recommended_genres = recommended_items.explode('genres')['genres'].unique()

In [18]:
import ast
actual_items['tags'] = actual_items['tags'].apply(lambda x: ast.literal_eval(x))
actual_items['categories'] = actual_items['categories'].apply(lambda x: ast.literal_eval(x))
actual_items['genres'] = actual_items['genres'].apply(lambda x: ast.literal_eval(x))

In [19]:
actual_tags = actual_items.explode('tags')['tags'].unique()
actual_categories = actual_items.explode('categories')['categories'].unique()
actual_genres = actual_items.explode('genres')['genres'].unique()

In [20]:
shared_tags = set(recommended_tags).intersection(actual_tags)
len(shared_tags)/len(actual_tags)

0.7878787878787878

In [21]:
shared_categories = set(recommended_categories).intersection(actual_categories)
len(shared_categories)/len(actual_categories)

0.9642857142857143

In [22]:
shared_genres = set(recommended_genres).intersection(actual_genres)
len(shared_genres)/len(actual_genres)

0.875