In [1]:
import graphlab
import cPickle, operator
import os
import re
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup

# Loading Data

In [2]:
items = graphlab.load_sframe("item_data.csv")
users = graphlab.load_sframe("user_data.csv")
users_items = graphlab.load_sframe("user_item.csv")

2016-04-27 04:24:37,356 [INFO] graphlab.cython.cy_server, 176: GraphLab Create v1.8.5 started. Logging: /tmp/graphlab_server_1461731076.log


This non-commercial license of GraphLab Create is assigned to johnnysand7@gmail.com and will expire on March 17, 2017. For commercial licensing options, visit https://dato.com/buy/.
------------------------------------------------------


Inferred types from first line of file as 
column_type_hints=[int,int,int,int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
------------------------------------------------------


Inferred types from first line of file as 
column_type_hints=[int,int,int,int,int,int,int,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
------------------------------------------------------


Inferred types from first line of file as 
column_type_hints=[int,int,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [3]:
users.head(5)

user_id,CommunityBanned,VACBanned,personastate,timecreated,num_friends,num_games,num_played
76561197963057664,0,0,0,12,34,23,14
76561198257721264,0,0,3,0,41,2,2
76561198189880304,0,0,1,1,10,4,3
76561197997398304,0,0,1,8,131,98,36
76561198145038640,0,0,1,2,85,8,6


In [4]:
items.head(5)

item_id,price_int,game_age,review_nums,genre
7830,5,7,7,Strategy
97330,10,4,7,Strategy
108710,30,4,7,Action
209000,20,3,7,Action
72850,20,5,7,RPG


In [7]:
users_items[:10000].save("sample_users_items_ratings.csv")

In [5]:
users_items.head(5)

item_id,rating,user_id
10,0,76561197963057664
20,0,76561197963057664
30,0,76561197963057664
50,0,76561197963057664
70,0,76561197963057664


# Model

### Training, Testing

In [None]:
train, test = graphlab.recommender.util.random_split_by_user(users_items,
                                                             user_id="user_id", item_id="item_id",
                                                             max_num_users=100, item_test_proportion=0.3)

In [None]:
model = graphlab.factorization_recommender.create(users_items,
                                            user_id='user_id',
                                            item_id='item_id',
                                            target='rating',
                                            user_data=users,
                                            item_data=items,
                                            random_seed=343,
                                            solver="sgd")

# New User

In [None]:
class CollectNewUser():
    
    def __init__(self, popular_games):
        self.popular_games = popular_games
        self.uid = self.determine_user_input()
        self.key = os.environ["ACCESS_STEAM"]
        self.bans = None
        self.user = None
        self.friends = None
    
    
    def determine_user_input(self):
        user_input = raw_input("Paste your Steam community profile url here: ")
        if len(user_input) == 17:
            if user_input.isdigit():
                return user_input
            else:
                return "Maybe you tried your 17-digt Steam ID, which was not recognized"

        elif "steamcommunity.com" not in user_input:
            return "Must be a Steam Community URL!"

        else:    
            try:
                response = requests.get(user_input)
                uid = re.findall(r"[0-9]{17}", response.text)[0]
                if len(uid) != 17:
                    return "Url did not work"
                return uid

            except IndexError:
                return "Could not find your profile."

            except requests.ConnectionError:
                return "Could not find your profile."
            
            
    def get_user_info(self):
        """
        For my model, I need the new user's:
          personastate
          location (eventually)
          profile avatar url (eventually)
          other things?
        """
        url = "http://api.steampowered.com/ISteamUser/GetPlayerSummaries"\
              +"/v0002/?key="+self.key+"&steamids="+self.uid
        user = requests.get(url).json()["response"]["players"][0]
        if user["communityvisibilitystate"] == 1:
            return None
        #desired_keys = set(user.keys()) - set(["profileurl", "personastateflags", "avatar", "avatarmedium", "steamid"])
        self.user = {k: user[k] for k in ("personastate", "timecreated", "steamid")}
    
    
    def get_bans(self):
        url = "http://api.steampowered.com/ISteamUser/GetPlayerBans"\
              +"/v1/?key="+self.key+"&steamids="+self.uid
        response = requests.get(url)
        self.s_code = str(response.status_code).startswith("2")
        ban = response.json()["players"][0]
        desired_keys = set(ban.keys()) - set(["SteamId", "DaysSinceLastBan", "EconomyBan",\
                                             "NumberOfGameBans", "NumberOfVACBans"])
        self.bans = {k: ban[k] for k in desired_keys}
    
    
    def get_friends(self):
        url = 'http://api.steampowered.com/ISteamUser/GetFriendList/v0001/?key='\
               +self.key+'&steamid='+self.uid+'&relationship=all'
        response = requests.get(url)
        if str(response.status_code).startswith("2"):
            friends = response.json()["friendslist"]["friends"]
            excluding = set(["relationship"])
            #return [{k: friend[k] for k in (set(friend.keys()) - excluding)} for friend in friends]
            self.friends = len(friends)
        else:
            return None
        
        
    def get_game_info(self):
        url =  "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?key="\
                +self.key+"&steamid="+self.uid+"&include_appinfo=1\
                &include_played_free_games=1&format=json"
        response = requests.get(url)
        if str(response.status_code).startswith("2"):
            try:
                games = response.json()["response"]["games"]
                for i, game in enumerate(games):
                    desired_keys = set(game.keys()) - set(["has_community_visible_stats",\
                                                           "img_icon_url", "img_logo_url"])
                    if "playtime_2weeks" in desired_keys:
                        game = {k: game[k] for k in desired_keys}
                        games[i] = game
                    else:
                        game = {k: game[k] for k in desired_keys}
                        game[u"playtime_2weeks"] = 0
                        games[i] = game
                return games
            except KeyError:
                return None
        else:
            return None
        
        
    def game_user_frames(self):
        game_df = pd.DataFrame(self.get_game_info())
        owned, played = len(game_df), len(game_df[game_df["playtime_forever"]!=0])
        game_df = game_df[game_df["appid"].isin(self.popular_games)]
        game_df["rating"] = pd.cut(game_df["playtime_forever"],\
                                          bins=[-1, 60, 120, 180, 240, 300, 10e10], \
                                          labels=[0,1,2,3,4,5]).astype(int)
        game_df["user_id"] = ((self.uid+" ") * len(game_df)).split()
        game_df["item_id"] = game_df["appid"].astype(int)
        game_df = game_df[["item_id", "rating", "user_id"]]
        game_df = graphlab.SFrame(game_df)
        user_dict = dict(self.bans.items()
                        + self.user.items()
                        + [("num_friends", self.friends),
                           ("num_played", played),
                           ("num_games", owned)])
        user_df = pd.DataFrame([user_dict])
        user_df["timecreated"] = int(round((time.time() - user_df["timecreated"]) / (3600 * 24 * 365), 2))
        user_df.rename(columns={"steamid":"user_id"}, inplace=True)
        user_df = graphlab.SFrame(user_df)
        return user_df, game_df

In [None]:
item_id_set = set(items["item_id"])

In [None]:
new_user = CollectNewUser(item_id_set)

In [None]:
new_user.uid

In [None]:
new_user.get_user_info()
new_user.get_bans()
new_user.get_friends()

In [None]:
new_user_side, new_user_items = new_user.game_user_frames()

In [None]:
model.recommend([int(new_user_items["user_id"][0])], new_observation_data=new_user_items, new_user_data=new_user_side)

In [None]:
similar_5 = model.get_similar_users([76561197990097040], k=5)["similar"]

In [None]:
number1 = similar_5[0]
number2 = similar_5[1]
number3 = similar_5[2]
number4 = similar_5[3]
number5 = similar_5[4]

In [None]:
model2 = graphlab.load_model("first_model")

In [None]:
similar_5

In [None]:
model2.get_similar_users([76561197990097040])

In [None]:
76561198079183328

In [None]:
76561197999882487 in users["user_id"]

In [None]:
model.save("first_model")

In [None]:
users_items_df = users_items.to_dataframe()

In [None]:
users_items_df[users_items_df["rating"]!=0].groupby("user_id").count().sort_values(by="rating", ascending=False)

In [None]:
top_similar = model.get_similar_users([76561197997398304])["similar"][0]

In [None]:
model.get_similar_users([76561197997398304])

In [None]:
with open("../banned_df.p", "rb") as f:
    games = cPickle.load(f)
games_users = games[["steamid", "game_info"]] 

In [None]:
def favorite_games_by_user(uid, df, model):
    """
    Parameters
    ----------
    uid: 17-digit integer
    df: df with columns "steamid" and "game_info"
    model: factorization_recommender model
    Outputs
    -------
    DataFrame with shared top games by users
    """
    game_dict = {}
    for games in df[df["steamid"] == uid]["game_info"]:
        for game in games:
            game_dict[str(game["appid"])] = game["playtime_forever"]
    sorted_games = sorted(game_dict.items(), key=operator.itemgetter(1))
    top_games_user1 = pd.DataFrame(sorted_games, columns=["appid_user_1", "playtime_user_1"])\
                    .sort_values(by="playtime_user_1", ascending=False)\
                    .head(10).reset_index(drop=True)
            
    rec_uid = model.get_similar_users([uid])["similar"][0]
    
    game_dict = {}
    for games in df[df["steamid"] == rec_uid]["game_info"]:
        for game in games:
            game_dict[str(game["appid"])] = game["playtime_forever"]
    sorted_games = sorted(game_dict.items(), key=operator.itemgetter(1))
    top_games_rec = pd.DataFrame(sorted_games, columns=["appid_rec", "playtime_rec"])\
                      .sort_values(by="playtime_rec", ascending=False)\
                      .head(10).reset_index(drop=True)
    combined = pd.concat((top_games_user1, top_games_rec), axis=1)
    return combined

In [None]:
test_out = favorite_games_by_user(76561197997398304, games_users, model)

In [None]:
item_genres = items[["item_id", "genre"]]

In [None]:
item_genres = item_genres.to_dataframe()#.rename(columns={"item_id":"appid_user_1"})

In [None]:
item_genres[item_genres["item_id"].isin(test_out["appid_user_1"].values.astype(int))]

In [None]:
item_genres[item_genres["item_id"].isin(test_out["appid_rec"].values.astype(int))]

In [None]:
test_out_2 = favorite_games_by_user(76561198069263328, games_users, model)

In [None]:
test_out_2

In [None]:
original_user = item_genres[item_genres["item_id"].isin(test_out_2["appid_user_1"].values.astype(int))]

In [None]:
original_user

In [None]:
original_user["title"] = ["Fallout: New Vegas", "Garry's Mod", "Payday 2", "Dayz", "Team Fortress 2",\
                         "Blacklight: Retribution", "Planetside 2", "Warframe", "Unturned", "CS:GO"]

In [None]:
original_user

In [None]:
rec_user = item_genres[item_genres["item_id"].isin(test_out_2["appid_rec"].values.astype(int))]

In [None]:
rec_user["title"] = ["Skyrim", "Arma 2", "Terreria", "Dayz", "Civilization V", "Kerbal Space Program",\
                    "Team Fortress 2", "ARK: Survival Evolved", "Starbound"]

In [None]:
rec_user

In [None]:
with open("../user_df.p", "rb") as f:
    users = cPickle.load(f)

In [None]:
users[users["steamid"]==76561198026695696]

In [None]:
users[users["steamid"]==76561198069263328]

In [None]:
model.get_similar_users([76561198069263328])

In [None]:
user_playtime[user_playtime["steamid"]==76561198026695696].sort_values(by="playtime_forever", ascending=False).head()

In [None]:
user_playtime[user_playtime["steamid"]==76561198069263328].sort_values(by="playtime_forever", ascending=False).head()

In [None]:
user_playtime = pd.read_csv("../final_sf.csv")

In [None]:
(user_playtime[user_playtime["appid"]==72850]["playtime_forever"]!=0).sum()

In [None]:
322209881 / (60. * 24) / 52000.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
global_playtime = (user_playtime.groupby("steamid").sum()["playtime_forever"].values / (60. * 24)).astype(int)

In [None]:
plt.figure(figsize=(9,7))
plt.hist(global_playtime, bins=50)
plt.show()

In [None]:
global_playtime

In [None]:
global_playtime[10::-1]

In [None]:
plt.hist(user_playtime[user_playtime["appid"]==72850]["playtime_forever"].values, bins=20)

In [None]:
273414 / (60. * 24)

In [None]:
with open("../user_df.p") as f:
    users_2 = cPickle.load(f)

In [None]:
users_2["personaname"][76561197997398304]

In [None]:
users_2[["personaname"]]
[76561198004737472].encode('ascii', 'ignore')

In [None]:
users_2[["personaname", "avatarfull"]].to_pickle("user_names")

In [None]:
with open("user_names") as f:
    users_3 = cPickle.load(f)

In [None]:
users_3[76561198004737472]

In [None]:
users_2["avatarfull"][76561198069263328]

In [None]:
with open("../cleaned_dataframe.p") as f:
    users_2 = cPickle.load(f)

In [None]:
users_2.head()