In [1]:
from src.recommenders import ImprovedRecommender
import pandas as pd

In [2]:
qual_eval_folder = './evaluation'
items_path = "./data/all_games.pkl.xz"
data_path = "./data/"
reviews_path = "./data/reviews.parquet"

The following code converts all games (including popular ones and those that are not present in australian user's inventories) to the expected pickle format:

In [None]:
def parse_json(filename_python_json: str, read_max: int = -1) -> pd.DataFrame:
    """Parses json file into a DataFrame

    Args:
        filename_python_json (str): Path to json file
        read_max (int, optional): Max amount of lines to read from json file. Defaults to -1.

    Returns:
        DataFrame: DataFrame from parsed json
    """
    with open(filename_python_json, "r", encoding="utf-8") as f:
        # parse json
        parse_data = []
        # tqdm is for showing progress bar, always good when processing large amounts of data
        for line in f:
            # load python nested datastructure
            parsed_result = eval(line)
            parse_data.append(parsed_result)
            if read_max != -1 and len(parse_data) > read_max:
                print(f"Break reading after {read_max} records")
                break
        print(f"Reading {len(parse_data)} rows.")

        # create dataframe
        df = pd.DataFrame.from_dict(parse_data)
        return df
    
games = parse_json("./data/raw/steam_games.json")
games.drop(["url", "reviews_url", "discount_price", "title", "release_date"], axis=1, inplace=True)

games.to_pickle("./data/all_games.pkl.gz")
games

First run the [scrape script](./src/data/scrape_users.py), which scrapes our own user inventories (or those of any specified user) from the steam API.
Then, we can generate recommendations for our own inventories! :)

In [3]:
rec = ImprovedRecommender(items_path, train_path=f"{data_path}train_all_users.parquet", test_path=None, val_path=None, reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True, dim_red=None)
rec.generate_recommendations()
eval = rec.qualitative_evaluation()
print(eval['recommended_items'][0])
print(eval['recommended_items'][1])

rec = ImprovedRecommender(items_path, train_path=f"{data_path}train_all_users.parquet", test_path=None, val_path=None, reviews_path=reviews_path, sparse=True, tfidf='smooth', normalize=True, dim_red=None, weighting_scheme={'playtime': True, 'sentiment': 'rating', 'reviews': False})
rec.generate_recommendations()
eval = rec.qualitative_evaluation()
print(eval['recommended_items'][0])
print(eval['recommended_items'][1])

2it [00:00, 168.26it/s]
2it [00:00, 52.78it/s]


["Pajama Sam 2: Thunder and Lightning Aren't So Frightening", 'Portal 2', 'Pajama Sam 3: You Are What You Eat From Your Head To Your Feet', 'Pajama Sam 4: Life Is Rough When You Lose Your Stuff!', 'White Noise 2', 'Trine 2: Complete Story', 'Trine Enchanted Edition', 'Keep Talking and Nobody Explodes', 'Gauntlet - Lilith the Necromancer Pack', 'Half Dead']
['Starbound', 'Portal 2', 'White Noise 2', 'Half Dead', 'Arma 3 Apex', 'BattleBlock Theater®', 'Unturned', 'Natural Selection 2', 'We Were Here', 'Keep Talking and Nobody Explodes']


2it [00:00, 83.33it/s]
2it [00:00, 47.54it/s]

['Portal 2', 'White Noise 2', 'Killing Floor 2', 'Starbound', 'Keep Talking and Nobody Explodes', 'Killing Floor', 'Arma 3 Apex', 'Half Dead', 'Awesomenauts - the 2D moba', 'Left 4 Dead']
['Starbound', 'Crea', 'Unturned', 'White Noise 2', 'Arma 3 Apex', 'Half Dead', 'Fallout Shelter', 'Natural Selection 2', 'Scrap Mechanic', 'PULSAR: Lost Colony']





These results indicate that using playtime and other information for weighting the items that are used to construct the user vector can significantly improve qualitative results.
For example, Sam isn't really interested in any of the games that are being recommended by the method without playtime weighting. The recommendations are the result of many free and cheap games that are present in the inventory.
When playtime weighting is used, these recommendations improve a lot. There is still some bias towards zombie games, but this is likely due to the item representations and the tf-idf scheme that is used.