https://cseweb.ucsd.edu/~jmcauley/datasets.html#steam_data

In [1]:
import os
from pathlib import Path
import ast

import pandas as pd
from tqdm import tqdm
import json
from datetime import datetime

In [2]:
cur_path = Path(os.getcwd())
file_game = cur_path / Path('steam_new.json')
file_meta = cur_path / Path('steam_games.json')

In [3]:
def convert_unixtime(date_str):
    date_obj = datetime.strptime(date_str, "%Y-%m-%d")
    unix_timestamp = int(date_obj.timestamp())
    return unix_timestamp

data = {
    'user': [],
    'item': [],
    'rating': [],
    'timestamp': [],
}

with open(file_game, 'r') as f:
    while True:
        line = f.readline()
        if not line: break
        game = ast.literal_eval(line)

        data['user'].append(game['username'])
        data['item'].append(game['product_id'])
        data['rating'].append(5)
        data['timestamp'].append(convert_unixtime(game['date']))

df_game = pd.DataFrame(data)
df_game.to_csv('bhv.csv', header=None, index=False)

In [5]:
meta_data = {
    'item': [],
    'category': [],
}

with open(file_meta, 'r') as f:
    while True:
        line = f.readline()
        if not line: break
        meta = ast.literal_eval(line)

        try:
            if 'genres' in meta.keys() and meta['genres']:
                meta_data['item'].append(meta['id'])
                meta_data['category'].append(meta['genres'])
            elif 'tags' in meta.keys() and meta['tags']:
                meta_data['item'].append(meta['id'])
                meta_data['category'].append(meta['tags'])
        except:
            print(meta) # duplicated
        

df_meta = pd.DataFrame(meta_data)
df_meta = df_meta.drop_duplicates(subset=['item']).reset_index(drop=True)
df_meta.to_csv('meta.csv', header=None, index=False)

{'publisher': 'Warner Bros. Interactive Entertainment, Feral Interactive (Mac)', 'genres': ['Action', 'Adventure'], 'app_name': 'Batman: Arkham City - Game of the Year Edition', 'sentiment': 'Overwhelmingly Positive', 'title': 'Batman: Arkham City - Game of the Year Edition', 'url': 'http://store.steampowered.com/app/200260', 'release_date': '2012-09-07', 'tags': ['Action', 'Open World', 'Batman', 'Adventure', 'Stealth', 'Third Person', 'Superhero', 'Singleplayer', "Beat 'em up", 'Comic Book', 'Detective', 'Fighting', 'Atmospheric', 'Story Rich', 'Action-Adventure', 'Cinematic', 'Controller', '3D Vision', 'Metroidvania', 'Sandbox'], 'metascore': 91, 'price': 19.99, 'early_access': False, 'specs': ['Single-player', 'Steam Achievements', 'Steam Trading Cards', 'Partial Controller Support', 'Steam Cloud'], 'developer': 'Rocksteady Studios,Feral Interactive (Mac)'}


In [7]:
df_game = df_game[df_game['item'].isin(df_meta['item'].unique())]
df_merge = pd.merge(df_game, df_meta, on='item', how='left')
df_merge.to_csv('data.csv', header=None, index=False)