In [2]:
import json
import pickle
import operator
from collections import Counter
import pandas as pd
import numpy as np
from recsys import * ## recommender system cookbook
from preprocessing import * ## pre-processing code
from IPython.display import HTML ## Setting display options for Ipython Notebook

%run '/home/dharmesh/Documents/DSE/dhpancha/DSE260_Capstone Project/cookbook-master/my_recsys.py'
%run '/home/dharmesh/Documents/DSE/dhpancha/DSE260_Capstone Project/cookbook-master/generic_preprocessing.py'

In [3]:
aussie_items = json.load(open('./SteamData/australian_users_items_fixed.json','r'))
steam_games = json.load(open('./SteamData/steam_games_fixed.json','r'))

In [4]:
def build_list(user_item_data):
    user_item = []
    for user in user_item_data:
        for item in user['items']:
            user_item.append((user['user_id'],item['item_name']))
    return user_item

In [5]:
def build_df(user_item):
    df = pd.DataFrame(user_item,columns=['user','item'])
    df = df.drop_duplicates(['user','item'])
    df['own'] = 1
    df = df.pivot(index='user',columns='item',values='own')
    df = df.fillna(0)
    return df

In [6]:
user_item = build_list(aussie_items)
user_item_df = build_df(user_item)

In [7]:
user_item_df.head()

item,神明的一天世界(God's One Day World),! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,#SkiJump,#killallzombies,$1 Ride,'n Verlore Verstand,.EXE,001 Game Creator,...,theHunter,theHunter: Primal,vrAMP,Астролорды: Оружие Пришельцев,Безумный Макс: Дорога ярости,丛林守望者（Ranger of the jungle）,侠客风云传(Tale of Wuxia),大海战 Navy Field IV,神楽道中記(KaguraDouchuuki),軒轅劍外傳穹之扉(The Gate of Firmament)
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
--ace--,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
--ionex--,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-2SV-vuLB-Kg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-404PageNotFound-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
top500 = user_item_df.sum().nlargest(500)

top500games = top500.index

user_top500games = user_item_df[top500games].stack().reset_index()
user_top500games = user_top500games.rename(columns={0:'rating'})


In [9]:
user_top500games.head()

Unnamed: 0,user,item,rating
0,--000--,Dota 2 Test,1.0
1,--000--,Counter-Strike: Global Offensive,1.0
2,--000--,Garry's Mod,1.0
3,--000--,Unturned,1.0
4,--000--,Left 4 Dead 2,1.0


In [10]:
games=pd.DataFrame()
games['item']=user_top500games.item.drop_duplicates()

In [11]:
games.head()

Unnamed: 0,item
0,Dota 2 Test
1,Counter-Strike: Global Offensive
2,Garry's Mod
3,Unturned
4,Left 4 Dead 2


In [12]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(user_top500games, test_size=0.2)

In [13]:
interactions_train = create_interaction_matrix(df = train,
                                         user_col = 'user',
                                         item_col = 'item',
                                         rating_col = 'rating',
                                         threshold = '1')
interactions_train.shape

(70912, 500)

In [14]:
interactions_train.head()

item,7 Days to Die,8BitMMO,APB Reloaded,ARK: Survival Evolved,ARK: Survival Of The Fittest,Ace of Spades,AdVenture Capitalist,Afterfall InSanity Extended Edition,Aftermath,Age of Empires II: HD Edition,...,World of Goo,World of Guns: Gun Disassembly,Worms Reloaded,Worms Revolution,XCOM: Enemy Unknown,Yet Another Zombie Defense,You Have to Win the Game,Zombies Monsters Robots,sZone-Online,theHunter
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
--ace--,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
--ionex--,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-2SV-vuLB-Kg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
-404PageNotFound-,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [15]:
interactions_test = create_interaction_matrix(df = test,
                                         user_col = 'user',
                                         item_col = 'item',
                                         rating_col = 'rating',
                                         threshold = '1')
interactions_test.shape

(70912, 500)

In [16]:
interactions_test.head()

item,7 Days to Die,8BitMMO,APB Reloaded,ARK: Survival Evolved,ARK: Survival Of The Fittest,Ace of Spades,AdVenture Capitalist,Afterfall InSanity Extended Edition,Aftermath,Age of Empires II: HD Edition,...,World of Goo,World of Guns: Gun Disassembly,Worms Reloaded,Worms Revolution,XCOM: Enemy Unknown,Yet Another Zombie Defense,You Have to Win the Game,Zombies Monsters Robots,sZone-Online,theHunter
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
--ace--,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
--ionex--,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-2SV-vuLB-Kg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
-404PageNotFound-,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
user_dict = create_user_dict(interactions=interactions_train)
len(user_dict)

70912

In [18]:
user_dict_test = create_user_dict(interactions=interactions_test)
len(user_dict_test)

70912

In [19]:
games_dict = create_item_dict(df = games,
                               id_col = 'item',
                               name_col = 'item')
len(games_dict)

500

In [20]:
mf_model = runMF(interactions = interactions_train,
                 n_components = 30,
                 loss = 'bpr',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4)

## User Recommender

In [21]:
rec_list = sample_recommendation_user(model = mf_model, 
                                      interactions = interactions_train, 
                                      user_id = '--ace--', 
                                      user_dict = user_dict,
                                      item_dict = games_dict, 
                                      threshold = 0,
                                      nrec_items = 20)

Known Likes:
1- War Thunder
2- Unturned
3- Trove
4- Tropico 3 - Steam Special Edition
5- Toribash
6- The Expendabros
7- The Binding of Isaac: Rebirth
8- Super Meat Boy
9- Sniper Elite V2
10- Relic Hunters Zero
11- Portal 2
12- Mitos.is: The Game
13- Garry's Mod
14- Fractured Space
15- Floating Point
16- Dota 2 Test
17- Dead Island: Epidemic
18- Counter-Strike: Global Offensive
19- Clicker Heroes
20- Castle Crashers
21- Brawlhalla
22- AdVenture Capitalist

 Recommended Items:
1- Robocraft
2- Transformice
3- Gun Monkeys
4- RaceRoom Racing Experience 
5- You Have to Win the Game
6- Really Big Sky
7- BattleBlock Theater
8- Emily is Away
9- Super Crate Box
10- The Way of Life Free Edition
11- Portal Stories: Mel
12- Race The Sun
13- GTR Evolution
14- 8BitMMO
15- Fistful of Frags
16- Only If
17- RACE 07
18- SpaceChem
19- Dino D-Day
20- Hell Yeah!


In [22]:
rec_list_test = sample_recommendation_user(model = mf_model, 
                                      interactions = interactions_test, 
                                      user_id = '--ace--', 
                                      user_dict = user_dict_test,
                                      item_dict = games_dict, 
                                      threshold = 0,
                                      nrec_items = 20)

Known Likes:
1- You Have to Win the Game
2- The Binding of Isaac
3- DLC Quest
4- BattleBlock Theater

 Recommended Items:
1- Unturned
2- Garry's Mod
3- Counter-Strike: Global Offensive
4- Dota 2 Test
5- Portal 2
6- Robocraft
7- AdVenture Capitalist
8- Brawlhalla
9- Transformice
10- Relic Hunters Zero
11- Gun Monkeys
12- RaceRoom Racing Experience 
13- The Expendabros
14- Floating Point
15- Really Big Sky
16- Mitos.is: The Game
17- Emily is Away
18- Super Crate Box
19- The Way of Life Free Edition
20- Portal Stories: Mel


In [23]:
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

from scipy.sparse import csr_matrix


# model = LightFM(learning_rate=0.05, loss='bpr')
# model.fit(train, epochs=10)

train_precision = precision_at_k(mf_model, csr_matrix(interactions_train), k=10).mean()
test_precision = precision_at_k(mf_model, csr_matrix(interactions_test), k=10).mean()

train_auc = auc_score(mf_model, csr_matrix(interactions_train)).mean()
test_auc = auc_score(mf_model, csr_matrix(interactions_test)).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.70, test 0.05.
AUC: train 0.93, test 0.74.


In [24]:
mf_model2 = runMF(interactions = interactions_train,
                 n_components = 30,
                 loss = 'warp',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4)

In [26]:
rec_list = sample_recommendation_user(model = mf_model2, 
                                      interactions = interactions_train, 
                                      user_id = '--ace--', 
                                      user_dict = user_dict_test,
                                      item_dict = games_dict, 
                                      threshold = 0,
                                      nrec_items = 20)

Known Likes:
1- War Thunder
2- Unturned
3- Trove
4- Tropico 3 - Steam Special Edition
5- Toribash
6- The Expendabros
7- The Binding of Isaac: Rebirth
8- Super Meat Boy
9- Sniper Elite V2
10- Relic Hunters Zero
11- Portal 2
12- Mitos.is: The Game
13- Garry's Mod
14- Fractured Space
15- Floating Point
16- Dota 2 Test
17- Dead Island: Epidemic
18- Counter-Strike: Global Offensive
19- Clicker Heroes
20- Castle Crashers
21- Brawlhalla
22- AdVenture Capitalist

 Recommended Items:
1- Terraria
2- Don't Starve Together
3- Tropico 4
4- Robocraft
5- Transformice
6- BattleBlock Theater
7- Left 4 Dead 2 Beta
8- Realm of the Mad God
9- Left 4 Dead 2
10- Emily is Away
11- Dirty Bomb
12- Don't Starve
13- Just Cause 2
14- The Binding of Isaac
15- Sakura Clicker
16- LIMBO
17- You Have to Win the Game
18- RaceRoom Racing Experience 
19- FreeStyle2: Street Basketball
20- Portal Stories: Mel


In [27]:
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

from scipy.sparse import csr_matrix

# model = LightFM(learning_rate=0.05, loss='bpr')
# model.fit(train, epochs=10)

train_precision = precision_at_k(mf_model2, csr_matrix(interactions_train), k=10).mean()
test_precision = precision_at_k(mf_model2, csr_matrix(interactions_test), k=10).mean()

train_auc = auc_score(mf_model2, csr_matrix(interactions_train)).mean()
test_auc = auc_score(mf_model2, csr_matrix(interactions_test)).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

Precision: train 0.69, test 0.09.
AUC: train 0.95, test 0.84.


In [28]:
train_auc_array = auc_score(mf_model2, csr_matrix(interactions_train)).mean()
print(train_auc_array)

test_auc_array = auc_score(mf_model2, csr_matrix(interactions_test)).mean()
print(test_auc_array)

0.95227855
0.8447535


## Item-User Recommender

In [29]:
sample_recommendation_item(model = mf_model,
                           interactions = interactions_train,
                           item_id = 'Counter-Strike',
                           user_dict = user_dict,
                           item_dict = games_dict,
                           number_of_user = 15)

['76561198060399077',
 '76561198024841969',
 '76561198001819145',
 '76561198005773932',
 '76561197970293781',
 'dvs',
 '76561198073417621',
 'CydusPC',
 'alphabromega',
 '76561198002258204',
 'nikkihoward8',
 'mrjordanfights',
 '76561197960617353',
 'supergeroi228',
 'tayneonegee']

## Item - Item Recommender

In [30]:
item_item_dist = create_item_emdedding_distance_matrix(model = mf_model,
                                                       interactions = interactions_train)

In [31]:
rec_list = item_item_recommendation(item_emdedding_distance_matrix = item_item_dist,
                                    item_id = 'Counter-Strike',
                                    item_dict = games_dict,
                                    n_items = 20)

Game of interest :Counter-Strike
Game similar to the above game:
1 - Counter-Strike: Condition Zero Deleted Scenes     Distance: 0.9935280680656433
2 - Counter-Strike: Condition Zero     Distance: 0.9924914836883545
3 - Ricochet     Distance: 0.8623061776161194
4 - Deathmatch Classic     Distance: 0.8620873093605042
5 - Day of Defeat     Distance: 0.8568188548088074
6 - Counter-Strike: Source     Distance: 0.5679379105567932
7 - Team Fortress Classic     Distance: 0.5159909129142761
8 - Day of Defeat: Source     Distance: 0.4974355697631836
9 - Half-Life: Opposing Force     Distance: 0.48945802450180054
10 - Half-Life     Distance: 0.48419177532196045
11 - Half-Life: Blue Shift     Distance: 0.47419384121894836
12 - Half-Life: Source     Distance: 0.41100046038627625
13 - Left 4 Dead     Distance: 0.4099256098270416
14 - Grand Theft Auto III     Distance: 0.3559361398220062
15 - Grand Theft Auto: San Andreas     Distance: 0.3070269525051117
16 - Serious Sam HD: The First Encounter     