In [120]:
import pandas as pd
import numpy as np
import pickle
import json
import ast
from surprise import Dataset, Reader, SVD, accuracy, SVDpp, KNNWithMeans
from surprise.model_selection import train_test_split, cross_validate, GridSearchCV
from sklearn.metrics.pairwise import cosine_similarity
from surprise.prediction_algorithms import knns, SVD, KNNWithMeans, KNNBasic, KNNBaseline
from surprise.similarities import cosine, msd, pearson
from scipy import sparse
import operator

In [31]:
game_data = pd.read_csv('../data/steamspy_data.csv')

In [32]:
game_data.head(50)

Unnamed: 0,appid,name,developer,publisher,score_rank,positive,negative,userscore,owners,average_forever,average_2weeks,median_forever,median_2weeks,price,initialprice,discount,languages,genre,ccu,tags
0,10,Counter-Strike,Valve,Valve,,185686,4807,0,"10,000,000 .. 20,000,000",9363,426,262,323,199,999,80,"English, French, German, Italian, Spanish - Sp...",Action,11955,"{'Action': 5372, 'FPS': 4796, 'Multiplayer': 3..."
1,20,Team Fortress Classic,Valve,Valve,,5235,874,0,"2,000,000 .. 5,000,000",852,3,27,3,99,499,80,"English, French, German, Italian, Spanish - Sp...",Action,94,"{'Action': 745, 'FPS': 306, 'Multiplayer': 258..."
2,30,Day of Defeat,Valve,Valve,,4885,541,0,"5,000,000 .. 10,000,000",811,0,16,0,99,499,80,"English, French, German, Italian, Spanish - Spain",Action,119,"{'FPS': 785, 'World War II': 246, 'Multiplayer..."
3,40,Deathmatch Classic,Valve,Valve,,1791,403,0,"5,000,000 .. 10,000,000",271,0,12,0,99,499,80,"English, French, German, Italian, Spanish - Sp...",Action,10,"{'Action': 628, 'FPS': 138, 'Classic': 106, 'M..."
4,50,Half-Life: Opposing Force,Gearbox Software,Valve,,12501,638,0,"5,000,000 .. 10,000,000",1919,3,171,5,99,499,80,"English, French, German, Korean",Action,122,"{'FPS': 879, 'Action': 321, 'Classic': 250, 'S..."
5,60,Ricochet,Valve,Valve,,3583,823,0,"5,000,000 .. 10,000,000",228,0,3,0,99,499,80,"English, French, German, Italian, Spanish - Sp...",Action,8,"{'Action': 585, 'FPS': 128, 'Multiplayer': 103..."
6,70,Half-Life,Valve,Valve,,65013,2344,0,"5,000,000 .. 10,000,000",1241,58,156,60,199,999,80,"English, French, German, Italian, Spanish - Sp...",Action,894,"{'FPS': 2214, 'Sci-fi': 1749, 'Action': 1746, ..."
7,80,Counter-Strike: Condition Zero,Valve,Valve,,18108,1771,0,"5,000,000 .. 10,000,000",1420,1,32,1,199,999,80,"English, French, German, Italian, Spanish - Sp...",Action,488,"{'Action': 1356, 'FPS': 1010, 'Shooter': 745, ..."
8,130,Half-Life: Blue Shift,Gearbox Software,Valve,,8929,868,0,"10,000,000 .. 20,000,000",1920,71,121,71,99,499,80,"English, French, German",Action,55,"{'FPS': 454, 'Action': 281, 'Sci-fi': 211, 'Si..."
9,220,Half-Life 2,Valve,Valve,,128896,3490,0,"10,000,000 .. 20,000,000",989,387,380,387,199,999,80,"English, French, German, Italian, Korean, Span...",Action,1041,"{'FPS': 3844, 'Action': 2731, 'Sci-fi': 2378, ..."


In [33]:
game_data = game_data[['appid', 'name', 'genre', 'tags']]

In [34]:
game_data.head()

Unnamed: 0,appid,name,genre,tags
0,10,Counter-Strike,Action,"{'Action': 5372, 'FPS': 4796, 'Multiplayer': 3..."
1,20,Team Fortress Classic,Action,"{'Action': 745, 'FPS': 306, 'Multiplayer': 258..."
2,30,Day of Defeat,Action,"{'FPS': 785, 'World War II': 246, 'Multiplayer..."
3,40,Deathmatch Classic,Action,"{'Action': 628, 'FPS': 138, 'Classic': 106, 'M..."
4,50,Half-Life: Opposing Force,Action,"{'FPS': 879, 'Action': 321, 'Classic': 250, 'S..."


In [35]:
game_data.set_index('appid', inplace=True)

In [36]:
game_data

Unnamed: 0_level_0,name,genre,tags
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,Counter-Strike,Action,"{'Action': 5372, 'FPS': 4796, 'Multiplayer': 3..."
20,Team Fortress Classic,Action,"{'Action': 745, 'FPS': 306, 'Multiplayer': 258..."
30,Day of Defeat,Action,"{'FPS': 785, 'World War II': 246, 'Multiplayer..."
40,Deathmatch Classic,Action,"{'Action': 628, 'FPS': 138, 'Classic': 106, 'M..."
50,Half-Life: Opposing Force,Action,"{'FPS': 879, 'Action': 321, 'Classic': 250, 'S..."
...,...,...,...
1483870,Draw & Guess,"Casual, Indie","{'Casual': 264, 'Multiplayer': 245, 'Hand-draw..."
1517290,Battlefield 2042,"Action, Adventure, Casual","{'Shooter': 445, 'Action': 407, 'Multiplayer':..."
1520470,封灵档案,Free to Play,"{'Sexual Content': 502, 'Free to Play': 342, '..."
1536610,OpenTTD,"Casual, Free to Play, Indie, Simulation","{'Simulation': 160, 'Free to Play': 141, 'Buil..."


In [37]:
def get_app_tags(tag_dict):
    tags = ast.literal_eval(tag_dict)
    tag_list = []
    for tag in tags:
        tag_list.append(tag)
        
    if len(tag_list) > 10:
        return tag_list[:10]
    else:
        return tag_list

In [38]:
game_data['tags'] = game_data['tags'].apply(get_app_tags)

In [39]:
game_data.tail()

Unnamed: 0_level_0,name,genre,tags
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1483870,Draw & Guess,"Casual, Indie","[Casual, Multiplayer, Hand-drawn, Funny, Famil..."
1517290,Battlefield 2042,"Action, Adventure, Casual","[Shooter, Action, Multiplayer, FPS, First-Pers..."
1520470,封灵档案,Free to Play,"[Sexual Content, Free to Play, Nudity, Mature,..."
1536610,OpenTTD,"Casual, Free to Play, Indie, Simulation","[Simulation, Free to Play, Building, Sandbox, ..."
1627140,Sabre Team,"RPG, Strategy","[Turn-Based Tactics, Military, Isometric, Turn..."


In [40]:
len(game_data)

1000

In [41]:
library_df = pd.read_csv('../data/library_data.csv')
library_df.head()

Unnamed: 0,steamid,library
0,76561198219067393,"[{'appid': 220, 'name': 'Half-Life 2', 'hours'..."
1,76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26..."
2,76561198993539076,hidden
3,76561198247182340,hidden
4,76561198278705159,hidden


In [42]:
len(library_df)

56978

In [43]:
library_df.set_index('steamid', inplace=True)
library_df

Unnamed: 0_level_0,library
steamid,Unnamed: 1_level_1
76561198219067393,"[{'appid': 220, 'name': 'Half-Life 2', 'hours'..."
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26..."
76561198993539076,hidden
76561198247182340,hidden
76561198278705159,hidden
...,...
76561197990543347,hidden
76561199206760437,hidden
76561198324908021,hidden
76561198253735927,hidden


In [44]:
library_df['library'].value_counts()

hidden                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  

In [45]:
hidden_libraries = library_df[library_df['library'] == 'hidden'].index
library_df = library_df.drop(hidden_libraries)

In [194]:
len(hidden_libraries)

49565

In [46]:
len(library_df)

6600

In [47]:
library_df.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [48]:
len(library_df)

6340

In [49]:
library_df

Unnamed: 0_level_0,library
steamid,Unnamed: 1_level_1
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26..."
76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours..."
76561198088650778,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours..."
76561198886682654,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours..."
76561198311899167,"[{'appid': 10, 'name': 'Counter-Strike', 'hour..."
...,...
76561198208253879,"[{'appid': 70, 'name': 'Half-Life', 'hours': 3..."
76561199104131020,"[{'appid': 230410, 'name': 'Warframe', 'hours'..."
76561198012694491,"[{'appid': 240, 'name': 'Counter-Strike: Sourc..."
76561198393589724,"[{'appid': 2600, 'name': 'Vampire: The Masquer..."


In [50]:
library_df['library']

steamid
76561198148157441    [{'appid': 17390, 'name': 'Spore', 'hours': 26...
76561198170079242    [{'appid': 3830, 'name': 'Psychonauts', 'hours...
76561198088650778    [{'appid': 4000, 'name': "Garry's Mod", 'hours...
76561198886682654    [{'appid': 4000, 'name': "Garry's Mod", 'hours...
76561198311899167    [{'appid': 10, 'name': 'Counter-Strike', 'hour...
                                           ...                        
76561198208253879    [{'appid': 70, 'name': 'Half-Life', 'hours': 3...
76561199104131020    [{'appid': 230410, 'name': 'Warframe', 'hours'...
76561198012694491    [{'appid': 240, 'name': 'Counter-Strike: Sourc...
76561198393589724    [{'appid': 2600, 'name': 'Vampire: The Masquer...
76561198008893422    [{'appid': 6880, 'name': 'Just Cause', 'hours'...
Name: library, Length: 6340, dtype: object

In [53]:
def get_app_tags(appid):
    return game_data.loc[appid]['tags']

In [54]:
get_app_tags(10)

['Action',
 'FPS',
 'Multiplayer',
 'Shooter',
 'Classic',
 'Team-Based',
 'First-Person',
 'Competitive',
 'Tactical',
 "1990's"]

In [55]:
def tag_hours(games_list):
    games_list = ast.literal_eval(games_list)
    tag_dict = {}
    for game_dict in games_list:
        if game_dict['appid'] in game_data.index and game_dict['hours'] != 0:            
            tags = get_app_tags(game_dict['appid'])
            for tag in tags:
                if tag in tag_dict.keys():
                    tag_dict[tag] += game_dict['hours']
                else:
                    tag_dict[tag] = game_dict['hours']
    return dict(sorted(tag_dict.items(), key=lambda item: item[1], reverse=True))
        

In [56]:
print(tag_hours(library_df.iloc[0]['library']))

{'Action': 115061, 'Multiplayer': 112293, 'Free to Play': 76816, 'Singleplayer': 68393, 'RPG': 62375, 'Adventure': 61227, 'Open World': 57530, 'Shooter': 51982, 'Co-op': 51835, 'Third Person': 47150, 'Pixel Graphics': 43583, 'Fantasy': 39130, 'Action Roguelike': 36701, 'Parkour': 35250, 'Looter Shooter': 33962, 'Third-Person Shooter': 32866, 'Difficult': 31310, 'Sci-fi': 31309, 'Ninja': 31003, 'PvP': 30370, 'Indie': 29962, 'Story Rich': 28889, 'Dungeon Crawler': 26977, 'Atmospheric': 26177, 'Rogue-like': 26136, 'Competitive': 25216, 'Massively Multiplayer': 24811, 'Team-Based': 24583, 'Replay Value': 23898, 'Dark': 23898, 'Great Soundtrack': 23898, 'Survival': 22631, 'Mature': 22247, 'Strategy': 20729, 'Open World Survival Craft': 20725, 'First-Person': 20064, 'Choices Matter': 19195, 'Nudity': 19195, 'Crafting': 18054, 'Exploration': 17782, 'Building': 16908, 'MOBA': 16477, 'Mythology': 16093, 'Sandbox': 15456, 'Stealth': 14189, 'Character Customization': 13977, 'MMORPG': 13736, 'FPS'

In [57]:
library_df

Unnamed: 0_level_0,library
steamid,Unnamed: 1_level_1
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26..."
76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours..."
76561198088650778,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours..."
76561198886682654,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours..."
76561198311899167,"[{'appid': 10, 'name': 'Counter-Strike', 'hour..."
...,...
76561198208253879,"[{'appid': 70, 'name': 'Half-Life', 'hours': 3..."
76561199104131020,"[{'appid': 230410, 'name': 'Warframe', 'hours'..."
76561198012694491,"[{'appid': 240, 'name': 'Counter-Strike: Sourc..."
76561198393589724,"[{'appid': 2600, 'name': 'Vampire: The Masquer..."


In [58]:
library_df['tag_hours'] = library_df['library'].apply(tag_hours)
library_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  library_df['tag_hours'] = library_df['library'].apply(tag_hours)


Unnamed: 0_level_0,library,tag_hours
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26...","{'Action': 115061, 'Multiplayer': 112293, 'Fre..."
76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours...","{'Multiplayer': 281821, 'Action': 213414, 'Fir..."
76561198088650778,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours...",{}
76561198886682654,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours...","{'Multiplayer': 109690, 'Singleplayer': 95062,..."
76561198311899167,"[{'appid': 10, 'name': 'Counter-Strike', 'hour...","{'Sandbox': 76219, 'Multiplayer': 74026, 'Firs..."
...,...,...
76561198208253879,"[{'appid': 70, 'name': 'Half-Life', 'hours': 3...","{'Multiplayer': 209252, 'Pixel Graphics': 1578..."
76561199104131020,"[{'appid': 230410, 'name': 'Warframe', 'hours'...","{'Multiplayer': 18877, 'Action': 18558, 'Shoot..."
76561198012694491,"[{'appid': 240, 'name': 'Counter-Strike: Sourc...","{'Multiplayer': 375807, 'Free to Play': 310345..."
76561198393589724,"[{'appid': 2600, 'name': 'Vampire: The Masquer...","{'Action': 43418, 'Multiplayer': 35866, 'RPG':..."


In [59]:
library_df['tag_hours'].value_counts()

TypeError: unhashable type: 'dict'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas\_libs\hashtable_class_helper.pxi", line 5231, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'dict'


{}                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      

In [60]:
no_hours = library_df[library_df['tag_hours'] == {}].index
library_df = library_df.drop(no_hours)

In [61]:
len(library_df)

5738

In [62]:
def normalize_hours(tag_dict):
    tag_labels = tag_dict.keys()
    tag_hours = tag_dict.values()
    sum_hours = sum(tag_hours)
    norm_hours = [float(i)/sum_hours for i in tag_hours]
    return dict(zip(tag_labels, norm_hours))

In [63]:
print(normalize_hours(library_df.iloc[0]['tag_hours']))

{'Action': 0.06109671527032911, 'Multiplayer': 0.0596269235262258, 'Free to Play': 0.04078884487537568, 'Singleplayer': 0.03631628134192836, 'RPG': 0.03312075868440895, 'Adventure': 0.032511177426377665, 'Open World': 0.030548092138100953, 'Shooter': 0.027602136720367872, 'Co-op': 0.027524080583668745, 'Third Person': 0.02503637309771354, 'Pixel Graphics': 0.02314231704597347, 'Fantasy': 0.020777800197529814, 'Action Roguelike': 0.019488015462548983, 'Parkour': 0.018717542983974596, 'Looter Shooter': 0.018033622548134617, 'Third-Person Shooter': 0.01745165298471799, 'Difficult': 0.01662542612278708, 'Sci-fi': 0.01662489512865988, 'Ninja': 0.016462410925735162, 'PvP': 0.016126291643214427, 'Indie': 0.015909646039314804, 'Story Rich': 0.01533988934082389, 'Dungeon Crawler': 0.01432462856960802, 'Atmospheric': 0.013899833267844058, 'Rogue-like': 0.013878062508628655, 'Competitive': 0.013389547911600097, 'Massively Multiplayer': 0.013174495290082092, 'Team-Based': 0.013053428629079362, 'Re

In [64]:
library_df['tag_hours'] = library_df['tag_hours'].apply(normalize_hours)
library_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  library_df['tag_hours'] = library_df['tag_hours'].apply(normalize_hours)


Unnamed: 0_level_0,library,tag_hours
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26...","{'Action': 0.06109671527032911, 'Multiplayer':..."
76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours...","{'Multiplayer': 0.06902065567186039, 'Action':..."
76561198886682654,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours...","{'Multiplayer': 0.05836965139977544, 'Singlepl..."
76561198311899167,"[{'appid': 10, 'name': 'Counter-Strike', 'hour...","{'Sandbox': 0.06608431018935979, 'Multiplayer'..."
76561199063236653,"[{'appid': 500, 'name': 'Left 4 Dead', 'hours'...","{'Multiplayer': 0.08737094891821529, 'Action':..."
...,...,...
76561198208253879,"[{'appid': 70, 'name': 'Half-Life', 'hours': 3...","{'Multiplayer': 0.08415388451422459, 'Pixel Gr..."
76561199104131020,"[{'appid': 230410, 'name': 'Warframe', 'hours'...","{'Multiplayer': 0.09915953143877712, 'Action':..."
76561198012694491,"[{'appid': 240, 'name': 'Counter-Strike: Sourc...","{'Multiplayer': 0.09924078778500166, 'Free to ..."
76561198393589724,"[{'appid': 2600, 'name': 'Vampire: The Masquer...","{'Action': 0.06086919949530352, 'Multiplayer':..."


In [65]:
library_df = library_df[~library_df.index.duplicated(keep='first')]

In [66]:
def rate_game_library(steamid):
    try:
        library = ast.literal_eval(library_df.loc[steamid]['library'])
    except ValueError:
        library = ast.literal_eval(library_df.loc[steamid]['library'].iloc[0])
    tag_hours = library_df.loc[steamid]['tag_hours']
    scores = {}
    for game in library:
        score = 0
        appid = game['appid']
        if appid in game_data.index:
            game_tags = game_data.loc[appid]['tags']
            for tag in game_tags:
                if tag in tag_hours:
                    score += tag_hours[tag]
        if appid in scores:
            scores[appid] += score
        else:
            scores[appid] = score
    return scores
        
    

In [67]:
rate_game_library(76561199095677278)
# print(library_df.loc[76561199095677278]['tag_hours'])

{236390: 0.18366166246653792,
 244210: 0.4656622311999295,
 251570: 0.1759023739716222,
 107410: 0.38017866073092044,
 270880: 0.5180837607005228,
 284160: 0.5229023053313854,
 21000: 0,
 213330: 0,
 313690: 0,
 355840: 0.19895764897382845,
 225540: 0.3032260911346232,
 577670: 0,
 585420: 0,
 605740: 0,
 636480: 0.13262274345221167,
 645630: 0.5497141624420236,
 648800: 0.18903030956746847,
 704850: 0.2849658269677685,
 715670: 0,
 873840: 0,
 787860: 0.7945176062207667,
 730: 0.10546180170816132,
 675010: 0.5437199086104275,
 1118200: 0.2224414351692962,
 1167630: 0.3117306164873849,
 1238840: 0.1852864749316049,
 1238860: 0.18923426912856317,
 1238810: 0.27452564693423287,
 424840: 0.08434120081191594,
 1248130: 0}

In [68]:
def game_ratings(df):
    index = df.index.values
#     print(index)
    library_ratings = []
    for steamid in index:
        rated_library = rate_game_library(steamid)
        library_ratings.append(rated_library)
    return library_ratings

In [None]:
library_df['game_ratings'] = library_df.index.to_series().apply(rate_game_library)

In [215]:
library_df

Unnamed: 0_level_0,library,tag_hours,game_ratings
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26...","{'Action': 0.06109671527032911, 'Multiplayer':...","{17390: 0.055111349468474874, 17440: 0, 550: 0..."
76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours...","{'Multiplayer': 0.06902065567186039, 'Action':...","{3830: 0.14023839496074098, 4000: 0.2444579416..."
76561198886682654,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours...","{'Multiplayer': 0.05836965139977544, 'Singlepl...","{4000: 0.21460970716729724, 400: 0.11458948611..."
76561198311899167,"[{'appid': 10, 'name': 'Counter-Strike', 'hour...","{'Sandbox': 0.06608431018935979, 'Multiplayer'...","{10: 0.2035782409655268, 80: 0.234358743150447..."
76561199063236653,"[{'appid': 500, 'name': 'Left 4 Dead', 'hours'...","{'Multiplayer': 0.08737094891821529, 'Action':...","{500: 0.3969835712361972, 3590: 0.114772421222..."
...,...,...,...
76561198054375336,"[{'appid': 220, 'name': 'Half-Life 2', 'hours'...","{'Open World': 0.08426903711625006, 'Adventure...","{220: 0.258722714828182, 320: 0.15850648804130..."
76561199104131020,"[{'appid': 230410, 'name': 'Warframe', 'hours'...","{'Multiplayer': 0.09915953143877712, 'Action':...","{230410: 0.5403950202237747, 238960: 0.1565162..."
76561198012694491,"[{'appid': 240, 'name': 'Counter-Strike: Sourc...","{'Multiplayer': 0.09924078778500166, 'Free to ...","{240: 0.22484247997000123, 300: 0.174308786792..."
76561198393589724,"[{'appid': 2600, 'name': 'Vampire: The Masquer...","{'Action': 0.06086919949530352, 'Multiplayer':...","{2600: 0, 6980: 0, 1700: 0, 22330: 0.368598065..."


In [70]:
with open('modded_library_df.pickle', 'wb') as handle:
    pickle.dump(library_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [71]:
with open('modded_library_df.pickle', 'rb') as handle:
    modded_library_df = pickle.load(handle)
modded_library_df

Unnamed: 0_level_0,library,tag_hours,game_ratings
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
76561198148157441,"[{'appid': 17390, 'name': 'Spore', 'hours': 26...","{'Action': 0.06109671527032911, 'Multiplayer':...","{17390: 0.055111349468474874, 17440: 0, 550: 0..."
76561198170079242,"[{'appid': 3830, 'name': 'Psychonauts', 'hours...","{'Multiplayer': 0.06902065567186039, 'Action':...","{3830: 0.14023839496074098, 4000: 0.2444579416..."
76561198886682654,"[{'appid': 4000, 'name': ""Garry's Mod"", 'hours...","{'Multiplayer': 0.05836965139977544, 'Singlepl...","{4000: 0.21460970716729724, 400: 0.11458948611..."
76561198311899167,"[{'appid': 10, 'name': 'Counter-Strike', 'hour...","{'Sandbox': 0.06608431018935979, 'Multiplayer'...","{10: 0.2035782409655268, 80: 0.234358743150447..."
76561199063236653,"[{'appid': 500, 'name': 'Left 4 Dead', 'hours'...","{'Multiplayer': 0.08737094891821529, 'Action':...","{500: 0.3969835712361972, 3590: 0.114772421222..."
...,...,...,...
76561198054375336,"[{'appid': 220, 'name': 'Half-Life 2', 'hours'...","{'Open World': 0.08426903711625006, 'Adventure...","{220: 0.258722714828182, 320: 0.15850648804130..."
76561199104131020,"[{'appid': 230410, 'name': 'Warframe', 'hours'...","{'Multiplayer': 0.09915953143877712, 'Action':...","{230410: 0.5403950202237747, 238960: 0.1565162..."
76561198012694491,"[{'appid': 240, 'name': 'Counter-Strike: Sourc...","{'Multiplayer': 0.09924078778500166, 'Free to ...","{240: 0.22484247997000123, 300: 0.174308786792..."
76561198393589724,"[{'appid': 2600, 'name': 'Vampire: The Masquer...","{'Action': 0.06086919949530352, 'Multiplayer':...","{2600: 0, 6980: 0, 1700: 0, 22330: 0.368598065..."


In [72]:
def user_rating_df(steamid):
    big_df = pd.DataFrame(columns=['steamid', 'appid', 'rating'])
    rating_dict = library_df.loc[steamid]['game_ratings']
    for app, rating in rating_dict.items():
        big_df = big_df.append({'steamid': str(steamid), 'appid': str(app), 'rating': rating}, ignore_index=True)
    return big_df

In [73]:
user_rating_df(76561198148157441)

Unnamed: 0,steamid,appid,rating
0,76561198148157441,17390,0.055111
1,76561198148157441,17440,0.000000
2,76561198148157441,550,0.215663
3,76561198148157441,47870,0.083723
4,76561198148157441,65600,0.000000
...,...,...,...
120,76561198148157441,582660,0.216987
121,76561198148157441,34270,0.157455
122,76561198148157441,1151640,0.228800
123,76561198148157441,1530140,0.000000


In [77]:
user_rec_df = pd.DataFrame(columns=['steamid', 'appid', 'rating'])

In [78]:
for num, steamid in enumerate(library_df.index):
    total = len(library_df)
    print(f"{num} out of {total}")
    user_df = user_rating_df(steamid)
    user_rec_df = pd.concat([user_rec_df, user_df], ignore_index = True, axis = 0)
user_rec_df

0 out of 5518
1 out of 5518
2 out of 5518
3 out of 5518
4 out of 5518
5 out of 5518
6 out of 5518
7 out of 5518
8 out of 5518
9 out of 5518
10 out of 5518
11 out of 5518
12 out of 5518
13 out of 5518
14 out of 5518
15 out of 5518
16 out of 5518
17 out of 5518
18 out of 5518
19 out of 5518
20 out of 5518
21 out of 5518
22 out of 5518
23 out of 5518
24 out of 5518
25 out of 5518
26 out of 5518
27 out of 5518
28 out of 5518
29 out of 5518
30 out of 5518
31 out of 5518
32 out of 5518
33 out of 5518
34 out of 5518
35 out of 5518
36 out of 5518
37 out of 5518
38 out of 5518
39 out of 5518
40 out of 5518
41 out of 5518
42 out of 5518
43 out of 5518
44 out of 5518
45 out of 5518
46 out of 5518
47 out of 5518
48 out of 5518
49 out of 5518
50 out of 5518
51 out of 5518
52 out of 5518
53 out of 5518
54 out of 5518
55 out of 5518
56 out of 5518
57 out of 5518
58 out of 5518
59 out of 5518
60 out of 5518
61 out of 5518
62 out of 5518
63 out of 5518
64 out of 5518
65 out of 5518
66 out of 5518
67 ou

519 out of 5518
520 out of 5518
521 out of 5518
522 out of 5518
523 out of 5518
524 out of 5518
525 out of 5518
526 out of 5518
527 out of 5518
528 out of 5518
529 out of 5518
530 out of 5518
531 out of 5518
532 out of 5518
533 out of 5518
534 out of 5518
535 out of 5518
536 out of 5518
537 out of 5518
538 out of 5518
539 out of 5518
540 out of 5518
541 out of 5518
542 out of 5518
543 out of 5518
544 out of 5518
545 out of 5518
546 out of 5518
547 out of 5518
548 out of 5518
549 out of 5518
550 out of 5518
551 out of 5518
552 out of 5518
553 out of 5518
554 out of 5518
555 out of 5518
556 out of 5518
557 out of 5518
558 out of 5518
559 out of 5518
560 out of 5518
561 out of 5518
562 out of 5518
563 out of 5518
564 out of 5518
565 out of 5518
566 out of 5518
567 out of 5518
568 out of 5518
569 out of 5518
570 out of 5518
571 out of 5518
572 out of 5518
573 out of 5518
574 out of 5518
575 out of 5518
576 out of 5518
577 out of 5518
578 out of 5518
579 out of 5518
580 out of 5518
581 out 

1032 out of 5518
1033 out of 5518
1034 out of 5518
1035 out of 5518
1036 out of 5518
1037 out of 5518
1038 out of 5518
1039 out of 5518
1040 out of 5518
1041 out of 5518
1042 out of 5518
1043 out of 5518
1044 out of 5518
1045 out of 5518
1046 out of 5518
1047 out of 5518
1048 out of 5518
1049 out of 5518
1050 out of 5518
1051 out of 5518
1052 out of 5518
1053 out of 5518
1054 out of 5518
1055 out of 5518
1056 out of 5518
1057 out of 5518
1058 out of 5518
1059 out of 5518
1060 out of 5518
1061 out of 5518
1062 out of 5518
1063 out of 5518
1064 out of 5518
1065 out of 5518
1066 out of 5518
1067 out of 5518
1068 out of 5518
1069 out of 5518
1070 out of 5518
1071 out of 5518
1072 out of 5518
1073 out of 5518
1074 out of 5518
1075 out of 5518
1076 out of 5518
1077 out of 5518
1078 out of 5518
1079 out of 5518
1080 out of 5518
1081 out of 5518
1082 out of 5518
1083 out of 5518
1084 out of 5518
1085 out of 5518
1086 out of 5518
1087 out of 5518
1088 out of 5518
1089 out of 5518
1090 out of 55

1514 out of 5518
1515 out of 5518
1516 out of 5518
1517 out of 5518
1518 out of 5518
1519 out of 5518
1520 out of 5518
1521 out of 5518
1522 out of 5518
1523 out of 5518
1524 out of 5518
1525 out of 5518
1526 out of 5518
1527 out of 5518
1528 out of 5518
1529 out of 5518
1530 out of 5518
1531 out of 5518
1532 out of 5518
1533 out of 5518
1534 out of 5518
1535 out of 5518
1536 out of 5518
1537 out of 5518
1538 out of 5518
1539 out of 5518
1540 out of 5518
1541 out of 5518
1542 out of 5518
1543 out of 5518
1544 out of 5518
1545 out of 5518
1546 out of 5518
1547 out of 5518
1548 out of 5518
1549 out of 5518
1550 out of 5518
1551 out of 5518
1552 out of 5518
1553 out of 5518
1554 out of 5518
1555 out of 5518
1556 out of 5518
1557 out of 5518
1558 out of 5518
1559 out of 5518
1560 out of 5518
1561 out of 5518
1562 out of 5518
1563 out of 5518
1564 out of 5518
1565 out of 5518
1566 out of 5518
1567 out of 5518
1568 out of 5518
1569 out of 5518
1570 out of 5518
1571 out of 5518
1572 out of 55

1997 out of 5518
1998 out of 5518
1999 out of 5518
2000 out of 5518
2001 out of 5518
2002 out of 5518
2003 out of 5518
2004 out of 5518
2005 out of 5518
2006 out of 5518
2007 out of 5518
2008 out of 5518
2009 out of 5518
2010 out of 5518
2011 out of 5518
2012 out of 5518
2013 out of 5518
2014 out of 5518
2015 out of 5518
2016 out of 5518
2017 out of 5518
2018 out of 5518
2019 out of 5518
2020 out of 5518
2021 out of 5518
2022 out of 5518
2023 out of 5518
2024 out of 5518
2025 out of 5518
2026 out of 5518
2027 out of 5518
2028 out of 5518
2029 out of 5518
2030 out of 5518
2031 out of 5518
2032 out of 5518
2033 out of 5518
2034 out of 5518
2035 out of 5518
2036 out of 5518
2037 out of 5518
2038 out of 5518
2039 out of 5518
2040 out of 5518
2041 out of 5518
2042 out of 5518
2043 out of 5518
2044 out of 5518
2045 out of 5518
2046 out of 5518
2047 out of 5518
2048 out of 5518
2049 out of 5518
2050 out of 5518
2051 out of 5518
2052 out of 5518
2053 out of 5518
2054 out of 5518
2055 out of 55

2481 out of 5518
2482 out of 5518
2483 out of 5518
2484 out of 5518
2485 out of 5518
2486 out of 5518
2487 out of 5518
2488 out of 5518
2489 out of 5518
2490 out of 5518
2491 out of 5518
2492 out of 5518
2493 out of 5518
2494 out of 5518
2495 out of 5518
2496 out of 5518
2497 out of 5518
2498 out of 5518
2499 out of 5518
2500 out of 5518
2501 out of 5518
2502 out of 5518
2503 out of 5518
2504 out of 5518
2505 out of 5518
2506 out of 5518
2507 out of 5518
2508 out of 5518
2509 out of 5518
2510 out of 5518
2511 out of 5518
2512 out of 5518
2513 out of 5518
2514 out of 5518
2515 out of 5518
2516 out of 5518
2517 out of 5518
2518 out of 5518
2519 out of 5518
2520 out of 5518
2521 out of 5518
2522 out of 5518
2523 out of 5518
2524 out of 5518
2525 out of 5518
2526 out of 5518
2527 out of 5518
2528 out of 5518
2529 out of 5518
2530 out of 5518
2531 out of 5518
2532 out of 5518
2533 out of 5518
2534 out of 5518
2535 out of 5518
2536 out of 5518
2537 out of 5518
2538 out of 5518
2539 out of 55

2963 out of 5518
2964 out of 5518
2965 out of 5518
2966 out of 5518
2967 out of 5518
2968 out of 5518
2969 out of 5518
2970 out of 5518
2971 out of 5518
2972 out of 5518
2973 out of 5518
2974 out of 5518
2975 out of 5518
2976 out of 5518
2977 out of 5518
2978 out of 5518
2979 out of 5518
2980 out of 5518
2981 out of 5518
2982 out of 5518
2983 out of 5518
2984 out of 5518
2985 out of 5518
2986 out of 5518
2987 out of 5518
2988 out of 5518
2989 out of 5518
2990 out of 5518
2991 out of 5518
2992 out of 5518
2993 out of 5518
2994 out of 5518
2995 out of 5518
2996 out of 5518
2997 out of 5518
2998 out of 5518
2999 out of 5518
3000 out of 5518
3001 out of 5518
3002 out of 5518
3003 out of 5518
3004 out of 5518
3005 out of 5518
3006 out of 5518
3007 out of 5518
3008 out of 5518
3009 out of 5518
3010 out of 5518
3011 out of 5518
3012 out of 5518
3013 out of 5518
3014 out of 5518
3015 out of 5518
3016 out of 5518
3017 out of 5518
3018 out of 5518
3019 out of 5518
3020 out of 5518
3021 out of 55

3446 out of 5518
3447 out of 5518
3448 out of 5518
3449 out of 5518
3450 out of 5518
3451 out of 5518
3452 out of 5518
3453 out of 5518
3454 out of 5518
3455 out of 5518
3456 out of 5518
3457 out of 5518
3458 out of 5518
3459 out of 5518
3460 out of 5518
3461 out of 5518
3462 out of 5518
3463 out of 5518
3464 out of 5518
3465 out of 5518
3466 out of 5518
3467 out of 5518
3468 out of 5518
3469 out of 5518
3470 out of 5518
3471 out of 5518
3472 out of 5518
3473 out of 5518
3474 out of 5518
3475 out of 5518
3476 out of 5518
3477 out of 5518
3478 out of 5518
3479 out of 5518
3480 out of 5518
3481 out of 5518
3482 out of 5518
3483 out of 5518
3484 out of 5518
3485 out of 5518
3486 out of 5518
3487 out of 5518
3488 out of 5518
3489 out of 5518
3490 out of 5518
3491 out of 5518
3492 out of 5518
3493 out of 5518
3494 out of 5518
3495 out of 5518
3496 out of 5518
3497 out of 5518
3498 out of 5518
3499 out of 5518
3500 out of 5518
3501 out of 5518
3502 out of 5518
3503 out of 5518
3504 out of 55

3928 out of 5518
3929 out of 5518
3930 out of 5518
3931 out of 5518
3932 out of 5518
3933 out of 5518
3934 out of 5518
3935 out of 5518
3936 out of 5518
3937 out of 5518
3938 out of 5518
3939 out of 5518
3940 out of 5518
3941 out of 5518
3942 out of 5518
3943 out of 5518
3944 out of 5518
3945 out of 5518
3946 out of 5518
3947 out of 5518
3948 out of 5518
3949 out of 5518
3950 out of 5518
3951 out of 5518
3952 out of 5518
3953 out of 5518
3954 out of 5518
3955 out of 5518
3956 out of 5518
3957 out of 5518
3958 out of 5518
3959 out of 5518
3960 out of 5518
3961 out of 5518
3962 out of 5518
3963 out of 5518
3964 out of 5518
3965 out of 5518
3966 out of 5518
3967 out of 5518
3968 out of 5518
3969 out of 5518
3970 out of 5518
3971 out of 5518
3972 out of 5518
3973 out of 5518
3974 out of 5518
3975 out of 5518
3976 out of 5518
3977 out of 5518
3978 out of 5518
3979 out of 5518
3980 out of 5518
3981 out of 5518
3982 out of 5518
3983 out of 5518
3984 out of 5518
3985 out of 5518
3986 out of 55

4410 out of 5518
4411 out of 5518
4412 out of 5518
4413 out of 5518
4414 out of 5518
4415 out of 5518
4416 out of 5518
4417 out of 5518
4418 out of 5518
4419 out of 5518
4420 out of 5518
4421 out of 5518
4422 out of 5518
4423 out of 5518
4424 out of 5518
4425 out of 5518
4426 out of 5518
4427 out of 5518
4428 out of 5518
4429 out of 5518
4430 out of 5518
4431 out of 5518
4432 out of 5518
4433 out of 5518
4434 out of 5518
4435 out of 5518
4436 out of 5518
4437 out of 5518
4438 out of 5518
4439 out of 5518
4440 out of 5518
4441 out of 5518
4442 out of 5518
4443 out of 5518
4444 out of 5518
4445 out of 5518
4446 out of 5518
4447 out of 5518
4448 out of 5518
4449 out of 5518
4450 out of 5518
4451 out of 5518
4452 out of 5518
4453 out of 5518
4454 out of 5518
4455 out of 5518
4456 out of 5518
4457 out of 5518
4458 out of 5518
4459 out of 5518
4460 out of 5518
4461 out of 5518
4462 out of 5518
4463 out of 5518
4464 out of 5518
4465 out of 5518
4466 out of 5518
4467 out of 5518
4468 out of 55

4892 out of 5518
4893 out of 5518
4894 out of 5518
4895 out of 5518
4896 out of 5518
4897 out of 5518
4898 out of 5518
4899 out of 5518
4900 out of 5518
4901 out of 5518
4902 out of 5518
4903 out of 5518
4904 out of 5518
4905 out of 5518
4906 out of 5518
4907 out of 5518
4908 out of 5518
4909 out of 5518
4910 out of 5518
4911 out of 5518
4912 out of 5518
4913 out of 5518
4914 out of 5518
4915 out of 5518
4916 out of 5518
4917 out of 5518
4918 out of 5518
4919 out of 5518
4920 out of 5518
4921 out of 5518
4922 out of 5518
4923 out of 5518
4924 out of 5518
4925 out of 5518
4926 out of 5518
4927 out of 5518
4928 out of 5518
4929 out of 5518
4930 out of 5518
4931 out of 5518
4932 out of 5518
4933 out of 5518
4934 out of 5518
4935 out of 5518
4936 out of 5518
4937 out of 5518
4938 out of 5518
4939 out of 5518
4940 out of 5518
4941 out of 5518
4942 out of 5518
4943 out of 5518
4944 out of 5518
4945 out of 5518
4946 out of 5518
4947 out of 5518
4948 out of 5518
4949 out of 5518
4950 out of 55

5374 out of 5518
5375 out of 5518
5376 out of 5518
5377 out of 5518
5378 out of 5518
5379 out of 5518
5380 out of 5518
5381 out of 5518
5382 out of 5518
5383 out of 5518
5384 out of 5518
5385 out of 5518
5386 out of 5518
5387 out of 5518
5388 out of 5518
5389 out of 5518
5390 out of 5518
5391 out of 5518
5392 out of 5518
5393 out of 5518
5394 out of 5518
5395 out of 5518
5396 out of 5518
5397 out of 5518
5398 out of 5518
5399 out of 5518
5400 out of 5518
5401 out of 5518
5402 out of 5518
5403 out of 5518
5404 out of 5518
5405 out of 5518
5406 out of 5518
5407 out of 5518
5408 out of 5518
5409 out of 5518
5410 out of 5518
5411 out of 5518
5412 out of 5518
5413 out of 5518
5414 out of 5518
5415 out of 5518
5416 out of 5518
5417 out of 5518
5418 out of 5518
5419 out of 5518
5420 out of 5518
5421 out of 5518
5422 out of 5518
5423 out of 5518
5424 out of 5518
5425 out of 5518
5426 out of 5518
5427 out of 5518
5428 out of 5518
5429 out of 5518
5430 out of 5518
5431 out of 5518
5432 out of 55

Unnamed: 0,steamid,appid,rating
0,76561198148157441,17390,0.055111
1,76561198148157441,17440,0.0
2,76561198148157441,550,0.215663
3,76561198148157441,47870,0.083723
4,76561198148157441,65600,0.0
...,...,...,...
901104,76561198008893422,1546540,0.0
901105,76561198008893422,582660,0.228932
901106,76561198008893422,1551360,0.0
901107,76561198008893422,46500,0.0


In [79]:
user_rec_df

Unnamed: 0,steamid,appid,rating
0,76561198148157441,17390,0.055111
1,76561198148157441,17440,0.0
2,76561198148157441,550,0.215663
3,76561198148157441,47870,0.083723
4,76561198148157441,65600,0.0
...,...,...,...
901104,76561198008893422,1546540,0.0
901105,76561198008893422,582660,0.228932
901106,76561198008893422,1551360,0.0
901107,76561198008893422,46500,0.0


In [80]:
# with open('big_rating_df.pickle', 'wb') as handle:
#     pickle.dump(user_rec_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [81]:
with open('big_rating_df.pickle', 'rb') as handle:
    big_rating_df = pickle.load(handle)
big_rating_df

Unnamed: 0,steamid,appid,rating
0,76561198148157441,17390,0.055111
1,76561198148157441,17440,0.0
2,76561198148157441,550,0.215663
3,76561198148157441,47870,0.083723
4,76561198148157441,65600,0.0
...,...,...,...
901104,76561198008893422,1546540,0.0
901105,76561198008893422,582660,0.228932
901106,76561198008893422,1551360,0.0
901107,76561198008893422,46500,0.0


In [135]:
big_rating_df.describe()

Unnamed: 0,steamid,appid,rating
count,901109,901109,901109.0
unique,5518,24320,411960.0
top,76561197960671791,730,0.0
freq,6465,4074,484397.0


In [82]:
# rating_matrix = big_rating_df.pivot_table(index='steamid', columns='appid', values='rating')
# # replace NaN values with 0
# rating_matrix = rating_matrix.fillna(0)
# # display the top few rows
# rating_matrix.head(50)

appid,10,100,10000,1000010,1000030,1000360,1000410,1000760,1001040,1001140,...,9990,99900,999020,99910,99920,999220,999660,999730,999820,999860
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561197960328475,0.294394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960428438,0.232919,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960447880,0.181719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.269084,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960495151,0.162066,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960520366,0.360615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960537588,0.192361,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960546596,0.257182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960553379,0.168603,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960671791,0.403836,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.302108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76561197960685118,0.3158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [83]:
rating_matrix['20']

steamid
76561197960328475    0.304646
76561197960428438    0.219435
76561197960447880    0.147018
76561197960495151    0.153904
76561197960520366    0.290524
                       ...   
76561199219951385    0.000000
76561199221236273    0.000000
76561199221692308    0.000000
76561199222020759    0.000000
76561199222092780    0.000000
Name: 20, Length: 5518, dtype: float64

In [84]:
rating_matrix.loc['76561197960328475']

appid
10         0.294394
100        0.000000
10000      0.000000
1000010    0.000000
1000030    0.000000
             ...   
999220     0.000000
999660     0.000000
999730     0.000000
999820     0.000000
999860     0.000000
Name: 76561197960328475, Length: 24320, dtype: float64

In [170]:
def find_similar_users(steamid, matrix, k=3):
    
    user = np.array(matrix.loc[steamid]).reshape(1, -1)
    
    
    other_users = matrix[matrix.index != steamid]
    similarities = cosine_similarity(user,other_users)[0].tolist()
    

    indices = other_users.index.tolist()
    

    index_similarity = dict(zip(indices, similarities))
    

    index_similarity_sorted = sorted(index_similarity.items(), key=operator.itemgetter(1))
    index_similarity_sorted.reverse()
    

    top_users_similarities = index_similarity_sorted[:k]
    users = [u[0] for u in top_users_similarities]
    
    return users
    

similar_user_indices = find_similar_users('76561198886682654', rating_matrix)
print(similar_user_indices)

['76561199028084778', '76561198132363145', '76561198314901071']


In [171]:
def recommend_item(steamid, matrix, items=5):
    
    similar_user_indices = find_similar_users(steamid, rating_matrix)
    # load vectors for similar users
    similar_users = matrix[matrix.index.isin(similar_user_indices)]
    # calc avg ratings across the 3 similar users
    similar_users = similar_users.mean(axis=0)
    # convert to dataframe so its easy to sort and filter
    similar_users_df = pd.DataFrame(similar_users, columns=['mean'])
    
    
    # load vector for the current user
    user_df = matrix[matrix.index == steamid]
    # transpose it so its easier to filter
    user_df_transposed = user_df.transpose()
    # rename the column as 'rating'
    user_df_transposed.columns = ['rating']
    # remove any rows without a 0 value
    user_df_transposed = user_df_transposed[user_df_transposed['rating']==0]
    # generate a list
    games_unseen = user_df_transposed.index.tolist()
    print(games_unseen)
    
    # filter avg ratings of similar users
    similar_users_df_filtered = similar_users_df[similar_users_df.index.isin(games_unseen)]
    # order the dataframe
    similar_users_df_ordered = similar_users_df_filtered.sort_values(by=['mean'], ascending=False)
    # grab the top n
    top_n_games = similar_users_df_ordered.head(items)
    top_n_games_indices = top_n_games.index.tolist()
    top_n_games_indices = [int(x) for x in top_n_games_indices]
    # lookup in the other dataframe to find names
    game_information = game_data[game_data.index.isin(top_n_games_indices)]
    
    return game_information #items

recommend_item('76561198886682654', rating_matrix, items=10)

['10', '100', '10000', '1000010', '1000030', '1000360', '1000410', '1000760', '1001040', '1001140', '1001220', '1001490', '1001800', '1001980', '1002', '1002000', '1002300', '1002430', '1002560', '1002830', '1003090', '1003360', '1003400', '1003480', '1003520', '1003590', '1003730', '1003890', '100400', '100410', '1004240', '1004270', '1004330', '1004390', '1004490', '1004510', '1004610', '1004740', '1004750', '1004770', '1004860', '1005240', '1005300', '1005410', '1005460', '1005520', '1005580', '1005930', '1005950', '1006220', '1006250', '1006400', '1006710', '1006930', '1007040', '1007350', '1007400', '1007630', '1007840', '10080', '1008020', '1008210', '1008510', '1008520', '1008710', '1008800', '1008920', '10090', '1009290', '1009450', '1009460', '1009560', '100970', '100980', '10100', '1010100', '1010260', '1010270', '1010450', '1010600', '1010670', '1010750', '1010860', '1010870', '10110', '1011070', '1011190', '1011290', '1011390', '1011420', '1011610', '1011670', '1011700', '1

Unnamed: 0_level_0,name,genre,tags
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
200210,Realm of the Mad God Exalt,"Action, Free to Play, Indie, Massively Multipl...","[Free to Play, Massively Multiplayer, Pixel Gr..."
204360,Castle Crashers,"Action, Adventure, Casual, Indie, RPG","[Co-op, Action, Multiplayer, Adventure, Local ..."
244850,Space Engineers,"Action, Indie, Simulation, Strategy","[Space, Sandbox, Building, Multiplayer, Open W..."
268910,Cuphead,"Action, Indie","[Difficult, Cartoon, Platformer, Great Soundtr..."
271590,Grand Theft Auto V,"Action, Adventure","[Open World, Action, Multiplayer, Automobile S..."
387290,Ori and the Blind Forest: Definitive Edition,Action,"[Atmospheric, Great Soundtrack, Metroidvania, ..."
427730,Who's Your Daddy?!,"Action, Casual, Indie","[Funny, Multiplayer, First-Person, Comedy, Sim..."
444640,Bloons TD Battles,"Action, Free to Play, Strategy","[Free to Play, Tower Defense, Multiplayer, Str..."
677620,Splitgate: Arena Warfare,"Action, Free to Play, Indie","[Free to Play, FPS, Multiplayer, Arena Shooter..."
848450,Subnautica: Below Zero,"Adventure, Indie","[Open World Survival Craft, Survival, Underwat..."


In [172]:
library_df.loc[76561198886682654]['library']

'[{\'appid\': 4000, \'name\': "Garry\'s Mod", \'hours\': 2138}, {\'appid\': 400, \'name\': \'Portal\', \'hours\': 199}, {\'appid\': 17390, \'name\': \'Spore\', \'hours\': 49}, {\'appid\': 6020, \'name\': \'STAR WARS™ Jedi Knight: Jedi Academy™\', \'hours\': 120}, {\'appid\': 620, \'name\': \'Portal 2\', \'hours\': 982}, {\'appid\': 105600, \'name\': \'Terraria\', \'hours\': 10879}, {\'appid\': 102600, \'name\': \'Orcs Must Die!\', \'hours\': 352}, {\'appid\': 201790, \'name\': \'Orcs Must Die! 2\', \'hours\': 1182}, {\'appid\': 49520, \'name\': \'Borderlands 2\', \'hours\': 104}, {\'appid\': 203160, \'name\': \'Tomb Raider\', \'hours\': 0}, {\'appid\': 230410, \'name\': \'Warframe\', \'hours\': 419}, {\'appid\': 236390, \'name\': \'War Thunder\', \'hours\': 271}, {\'appid\': 238960, \'name\': \'Path of Exile\', \'hours\': 373}, {\'appid\': 223750, \'name\': \'DCS World Steam Edition\', \'hours\': 0}, {\'appid\': 218620, \'name\': \'PAYDAY 2\', \'hours\': 1819}, {\'appid\': 246620, \'na

In [88]:
rating_matrix_sparse = sparse.csr_matrix(rating_matrix)

In [89]:
print(rating_matrix_sparse)

  (0, 0)	0.29439372497583877
  (0, 458)	0.2713766740300981
  (0, 729)	0.24760199502968386
  (0, 2317)	0.29527474803258313
  (0, 3746)	0.3046458649730775
  (0, 3831)	0.18547735744857105
  (0, 4230)	0.29491664365594367
  (0, 4316)	0.19844159878503384
  (0, 4327)	0.28617544525749
  (0, 4419)	0.3679578558608311
  (0, 4461)	0.2866431382024024
  (0, 4543)	0.18377226287449952
  (0, 4601)	0.22669819135717245
  (0, 4662)	0.1693445395554328
  (0, 4683)	0.2664883335634406
  (0, 5593)	0.1134621358553086
  (0, 5640)	0.25265342399558194
  (0, 6407)	0.2884448778130609
  (0, 6408)	0.26264669335910534
  (0, 6584)	0.24538865111141792
  (0, 6798)	0.3039581319895071
  (0, 7053)	0.3285016567720558
  (0, 7559)	0.326160603341157
  (0, 7816)	0.2764375949192324
  (0, 7993)	0.1365568479911639
  :	:
  (5514, 4977)	0.36647050169192286
  (5514, 5018)	0.3944975724584374
  (5514, 8665)	0.34922760041194645
  (5514, 12726)	0.21296160070619397
  (5514, 15569)	0.19195233191113725
  (5514, 20411)	0.2208327203177873
  (55

In [90]:
# instantiate a reader and read in our rating data
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(big_rating_df[['steamid','appid','rating']], reader)

In [101]:
# train on 75% of known rates
trainset, testset = train_test_split(data, test_size=.25)

In [102]:
svd = SVD()
svd.fit(trainset)
predictions = svd.test(testset)

In [103]:
# check the accuracy using Root Mean Square Error
accuracy.rmse(predictions)

RMSE: 0.0623


0.062337143496900546

In [105]:
# Run 5-fold cross-validation and then print results
cross_validate(svd, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0615  0.0623  0.0614  0.0614  0.0614  0.0616  0.0004  
MAE (testset)     0.0369  0.0372  0.0371  0.0369  0.0369  0.0370  0.0001  
Fit time          35.02   35.07   36.08   34.49   33.93   34.92   0.71    
Test time         1.60    1.57    1.59    1.55    1.54    1.57    0.02    


{'test_rmse': array([0.06153856, 0.06232288, 0.06136012, 0.06137005, 0.06142886]),
 'test_mae': array([0.03689055, 0.03720996, 0.03707589, 0.03685519, 0.03685905]),
 'fit_time': (35.02059197425842,
  35.066723346710205,
  36.07715964317322,
  34.493210315704346,
  33.932953119277954),
 'test_time': (1.5989315509796143,
  1.5729968547821045,
  1.5940096378326416,
  1.5532119274139404,
  1.5429942607879639)}

In [199]:
sim_cos = {'name':'cosine', 'user_based':True}

In [200]:
# train KNNWithMeans on 75% of known rates
knnwm = KNNWithMeans(sim_options=sim_cos)
knnwm.fit(trainsetwm)
predictionswm = knnwm.test(testset)

Computing the cosine similarity matrix...


  sim = construction_func[name](*args)


Done computing similarity matrix.


In [201]:
# check the accuracy using Root Mean Square Error
accuracy.rmse(predictionswm)

RMSE: 0.0531


0.053062959326523114

In [202]:
# Run 5-fold cross-validation and then print results
cross_validate(knnwm, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0582  0.0586  0.0579  0.0578  0.0584  0.0582  0.0003  
MAE (testset)     0.0334  0.0336  0.0333  0.0335  0.0335  0.0334  0.0001  
Fit time          41.59   41.43   41.46   42.18   42.00   41.73   0.30    
Test time         54.67   53.43   52.82   57.75   53.56   54.45   1.76    


{'test_rmse': array([0.05821491, 0.0586386 , 0.05792748, 0.05780225, 0.05837665]),
 'test_mae': array([0.03344514, 0.03355665, 0.03327553, 0.03347897, 0.03346577]),
 'fit_time': (41.59331297874451,
  41.4290189743042,
  41.45554327964783,
  42.183358907699585,
  41.995267391204834),
 'test_time': (54.666250467300415,
  53.428630352020264,
  52.82038140296936,
  57.75379467010498,
  53.55896329879761)}

In [107]:
sim_cos = {'name':'cosine', 'user_based':True}

In [108]:
basic = knns.KNNBasic(sim_options=sim_cos)
basic.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


  sim = construction_func[name](*args)


<surprise.prediction_algorithms.knns.KNNBasic at 0x1a531b72100>

In [110]:
pred_basic = basic.test(testset)

In [111]:
accuracy.rmse(pred_basic)

RMSE: 0.0558


0.05578259960941746

In [112]:
# Run 5-fold cross-validation and then print results
cross_validate(basic, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0553  0.0554  0.0548  0.0554  0.0554  0.0553  0.0003  
MAE (testset)     0.0261  0.0260  0.0260  0.0263  0.0261  0.0261  0.0001  
Fit time          41.97   40.65   41.22   40.90   41.20   41.19   0.45    
Test time         52.33   52.60   52.48   51.29   53.09   52.36   0.59    


{'test_rmse': array([0.05527731, 0.05544159, 0.05478145, 0.05544405, 0.05541005]),
 'test_mae': array([0.0260514 , 0.02600047, 0.02603646, 0.02628626, 0.02613244]),
 'fit_time': (41.971508502960205,
  40.64889574050903,
  41.218589305877686,
  40.89688587188721,
  41.19930958747864),
 'test_time': (52.33384418487549,
  52.60404872894287,
  52.478718280792236,
  51.28813338279724,
  53.08840870857239)}

In [113]:
sim_pearson = {'name':'pearson', 'user_based':True}
basic_pearson = knns.KNNBasic(sim_options=sim_pearson)
basic_pearson.fit(trainset)
pred_pearson = basic_pearson.test(testset)

Computing the pearson similarity matrix...


  sim = construction_func[name](*args)


Done computing similarity matrix.


In [114]:
accuracy.rmse(pred_pearson)

RMSE: 0.0526


0.05256830583597622

In [115]:
# Run 5-fold cross-validation and then print results
cross_validate(basic_pearson, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0520  0.0520  0.0522  0.0520  0.0519  0.0520  0.0001  
MAE (testset)     0.0243  0.0242  0.0243  0.0243  0.0242  0.0243  0.0000  
Fit time          52.69   51.72   50.34   53.04   52.70   52.10   0.98    
Test time         54.89   50.04   52.63   54.02   52.92   52.90   1.64    


{'test_rmse': array([0.05202397, 0.05202659, 0.0521685 , 0.05201977, 0.05186694]),
 'test_mae': array([0.0243025 , 0.02420739, 0.02432959, 0.02425311, 0.0242157 ]),
 'fit_time': (52.69379758834839,
  51.716752767562866,
  50.3403594493866,
  53.04188942909241,
  52.69936466217041),
 'test_time': (54.88574504852295,
  50.03525495529175,
  52.628581285476685,
  54.020856857299805,
  52.922216176986694)}

In [119]:
## Perform a gridsearch with SVD
# ⏰ This cell may take several minutes to run
params = {'n_factors': [20, 50, 100],
         'reg_all': [0.02, 0.05, 0.1]}
g_s_svd = GridSearchCV(SVD,param_grid=params,n_jobs=-1)
g_s_svd.fit(data)

In [121]:
print(g_s_svd.best_score)
print(g_s_svd.best_params)

{'rmse': 0.055787059192594635, 'mae': 0.03228830010658591}
{'rmse': {'n_factors': 20, 'reg_all': 0.05}, 'mae': {'n_factors': 20, 'reg_all': 0.02}}


In [139]:
big_rating_df

Unnamed: 0,steamid,appid,rating
0,76561198148157441,17390,0.055111
1,76561198148157441,17440,0.0
2,76561198148157441,550,0.215663
3,76561198148157441,47870,0.083723
4,76561198148157441,65600,0.0
...,...,...,...
901104,76561198008893422,1546540,0.0
901105,76561198008893422,582660,0.228932
901106,76561198008893422,1551360,0.0
901107,76561198008893422,46500,0.0


In [151]:
game_data

Unnamed: 0_level_0,name,genre,tags
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,Counter-Strike,Action,"[Action, FPS, Multiplayer, Shooter, Classic, T..."
20,Team Fortress Classic,Action,"[Action, FPS, Multiplayer, Classic, Hero Shoot..."
30,Day of Defeat,Action,"[FPS, World War II, Multiplayer, Shooter, Acti..."
40,Deathmatch Classic,Action,"[Action, FPS, Classic, Multiplayer, Shooter, F..."
50,Half-Life: Opposing Force,Action,"[FPS, Action, Classic, Sci-fi, Singleplayer, S..."
...,...,...,...
1483870,Draw & Guess,"Casual, Indie","[Casual, Multiplayer, Hand-drawn, Funny, Famil..."
1517290,Battlefield 2042,"Action, Adventure, Casual","[Shooter, Action, Multiplayer, FPS, First-Pers..."
1520470,封灵档案,Free to Play,"[Sexual Content, Free to Play, Nudity, Mature,..."
1536610,OpenTTD,"Casual, Free to Play, Indie, Simulation","[Simulation, Free to Play, Building, Sandbox, ..."


In [140]:
dataset = data.build_full_trainset()
print('Number of users: ', dataset.n_users, '\n')
print('Number of items: ', dataset.n_items)

Number of users:  5518 

Number of items:  24320


In [178]:
svd = SVD(n_factors= 20, reg_all=0.05)
svd.fit(dataset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1a56ed436d0>

In [179]:
svd.predict('76561198148157441', 65600)

Prediction(uid='76561198148157441', iid=65600, r_ui=None, est=0.06033106062489715, details={'was_impossible': False})

In [180]:
list_of_games = []
for appid in big_rating_df['appid'].unique():
    list_of_games.append( (appid,svd.predict('76561198886682654',appid)[3]))

In [181]:
# order the predictions from highest to lowest rated
ranked_games = sorted(list_of_games, key=lambda x:x[1], reverse=True)

In [182]:
ranked_games

[('552520', 0.34276862630510274),
 ('530700', 0.33136107566918305),
 ('298110', 0.3271472686465171),
 ('359550', 0.3205875360272703),
 ('286940', 0.3199188616167072),
 ('355840', 0.31838852228031117),
 ('550650', 0.31586432412382237),
 ('482730', 0.3157647622042885),
 ('19900', 0.3150206191947515),
 ('921060', 0.3142229680116953),
 ('291480', 0.31421504861689004),
 ('349700', 0.31252156419077926),
 ('202970', 0.3123412642519447),
 ('939960', 0.30917422198136557),
 ('686810', 0.3080878716948282),
 ('107410', 0.3080616049707428),
 ('730', 0.3077674766560911),
 ('355180', 0.3072729134361751),
 ('1263850', 0.3065419295038304),
 ('436520', 0.30565340929002305),
 ('273110', 0.30545477244494457),
 ('924970', 0.3054245984075369),
 ('506540', 0.30401442192406697),
 ('962130', 0.3025970489417544),
 ('80', 0.30088986579996563),
 ('203290', 0.3006618397417286),
 ('723780', 0.3003263241493418),
 ('581320', 0.2987418355677616),
 ('1240440', 0.29835896284049745),
 ('9200', 0.2982772199676703),
 ('391

In [184]:
# return the top n recommendations using the 
def recommended_games(user_ratings,game_title_df,n):
        for idx, rec in enumerate(user_ratings):
            title = game_title_df.loc[int(rec[0])]['name']
            print('Recommendation # ', idx+1, ': ', title, '\n')
            n-= 1
            if n == 0:
                break
            
recommended_games(ranked_games,game_data,5)

Recommendation #  1 :  Far Cry 5 

Recommendation #  2 :  Argo 

Recommendation #  3 :  Far Cry 4 

Recommendation #  4 :  Tom Clancy's Rainbow Six Siege 

Recommendation #  5 :  S.K.I.L.L. - Special Force 2 (Shooter) 



In [193]:
## Perform a gridsearch with KNNBasic
params = {'k': [3, 5, 10, 20],
              'sim_options': {'name': ['msd', 'cosine', 'pearson'],
                              'min_support': [1, 5],
                              'user_based': [True]}
              }

g_s_knnb = GridSearchCV(knns.KNNBasic,param_grid=params, measures=['rmse', 'mae'], cv=3)
g_s_knnb.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...


  sim = construction_func[name](*args)


Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...


  sim = construction_func[name](*args)


Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity

In [198]:
results_df = pd.DataFrame.from_dict(g_s_knnb.cv_results)
results_df.sort_values('rank_test_rmse', axis=0, ascending=True)

Unnamed: 0,split0_test_rmse,split1_test_rmse,split2_test_rmse,mean_test_rmse,std_test_rmse,rank_test_rmse,split0_test_mae,split1_test_mae,split2_test_mae,mean_test_mae,std_test_mae,rank_test_mae,mean_fit_time,std_fit_time,mean_test_time,std_test_time,params,param_k,param_sim_options
13,0.039793,0.04038,0.039373,0.039849,0.000413,1,0.016563,0.016694,0.016542,0.0166,6.8e-05,1,13.310802,0.069528,61.489938,1.068945,"{'k': 10, 'sim_options': {'name': 'msd', 'min_...",10,"{'name': 'msd', 'min_support': 5, 'user_based'..."
19,0.040447,0.041077,0.040067,0.040531,0.000417,2,0.016902,0.017052,0.01689,0.016948,7.4e-05,2,13.190323,0.136918,67.19539,1.323651,"{'k': 20, 'sim_options': {'name': 'msd', 'min_...",20,"{'name': 'msd', 'min_support': 5, 'user_based'..."
7,0.040522,0.041176,0.040104,0.040601,0.000441,3,0.016999,0.017198,0.016996,0.017064,9.4e-05,3,13.402602,0.131231,60.526376,0.14525,"{'k': 5, 'sim_options': {'name': 'msd', 'min_s...",5,"{'name': 'msd', 'min_support': 5, 'user_based'..."
1,0.042409,0.043101,0.041988,0.042499,0.000459,4,0.017998,0.018204,0.017983,0.018062,0.000101,4,13.419875,0.065221,60.25229,0.468171,"{'k': 3, 'sim_options': {'name': 'msd', 'min_s...",3,"{'name': 'msd', 'min_support': 5, 'user_based'..."
23,0.045817,0.046255,0.045475,0.045849,0.000319,5,0.021148,0.021187,0.021119,0.021151,2.8e-05,6,32.485032,0.120129,66.56407,0.392886,"{'k': 20, 'sim_options': {'name': 'pearson', '...",20,"{'name': 'pearson', 'min_support': 5, 'user_ba..."
21,0.046421,0.046796,0.046067,0.046428,0.000298,6,0.021572,0.02161,0.021557,0.02158,2.2e-05,7,26.322695,0.155449,67.738197,2.253554,"{'k': 20, 'sim_options': {'name': 'cosine', 'm...",20,"{'name': 'cosine', 'min_support': 5, 'user_bas..."
17,0.046781,0.047219,0.046429,0.04681,0.000323,7,0.021732,0.021805,0.021694,0.021743,4.6e-05,8,32.339813,0.045634,60.767941,0.329672,"{'k': 10, 'sim_options': {'name': 'pearson', '...",10,"{'name': 'pearson', 'min_support': 5, 'user_ba..."
15,0.047355,0.047692,0.046985,0.047344,0.000289,8,0.022167,0.022207,0.022115,0.022163,3.8e-05,9,26.343665,0.090478,61.426993,0.732737,"{'k': 10, 'sim_options': {'name': 'cosine', 'm...",10,"{'name': 'cosine', 'min_support': 5, 'user_bas..."
18,0.047384,0.048,0.047276,0.047553,0.000319,9,0.020848,0.020909,0.020843,0.020867,3e-05,5,13.371477,0.10386,66.582317,0.816759,"{'k': 20, 'sim_options': {'name': 'msd', 'min_...",20,"{'name': 'msd', 'min_support': 1, 'user_based'..."
11,0.049639,0.050045,0.049354,0.049679,0.000284,10,0.023295,0.023402,0.023268,0.023322,5.8e-05,11,32.3697,0.083547,59.1433,0.452423,"{'k': 5, 'sim_options': {'name': 'pearson', 'm...",5,"{'name': 'pearson', 'min_support': 5, 'user_ba..."


In [206]:
sim_cos = {'name':'msd', 'min_support': 5, 'user_based':True}
basic = knns.KNNBasic(k=10, sim_options=sim_cos)
basic.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x1a5310631c0>

In [207]:
pred_basic = basic.test(testset)
accuracy.rmse(pred_basic)

RMSE: 0.0379


0.03794712520573854

In [208]:
# Run 5-fold cross-validation and then print results
cross_validate(basic, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0370  0.0373  0.0367  0.0371  0.0367  0.0370  0.0002  
MAE (testset)     0.0153  0.0153  0.0152  0.0154  0.0153  0.0153  0.0001  
Fit time          19.02   18.90   19.38   19.54   19.60   19.29   0.28    
Test time         44.37   45.47   45.71   47.80   47.05   46.08   1.21    


{'test_rmse': array([0.03702476, 0.03734744, 0.03674911, 0.03705437, 0.03672082]),
 'test_mae': array([0.01532378, 0.01530787, 0.01519903, 0.01540851, 0.01527278]),
 'fit_time': (19.017417430877686,
  18.901975393295288,
  19.376511335372925,
  19.542426586151123,
  19.60491919517517),
 'test_time': (44.37453293800354,
  45.46958541870117,
  45.706664085388184,
  47.795565128326416,
  47.05323076248169)}

In [211]:
def recs_for_user(steamid, num_games):
    list_of_games = []
    for appid in big_rating_df['appid'].unique():
        list_of_games.append( (appid,basic.predict(steamid,appid)[3]))
    # order the predictions from highest to lowest rated
    ranked_games = sorted(list_of_games, key=lambda x:x[1], reverse=True)
    recommended_games(ranked_games,game_data,num_games)

In [218]:
recs_for_user('76561197963796380', 5)

Recommendation #  1 :  Counter-Strike: Source 

Recommendation #  2 :  Counter-Strike: Global Offensive 

Recommendation #  3 :  Counter-Strike: Condition Zero 

Recommendation #  4 :  Counter-Strike 

Recommendation #  5 :  Tom Clancy's Rainbow Six Siege 

