In [1]:
import pandas as pd
import seaborn as sns
import json
from sqlalchemy import *

In [73]:
dict_owned_games = {}
with open('data/user_data.txt', 'r') as f:
    for raw_string in f.readlines(): 
        user_id, lst_inventory = list(json.loads(raw_string).items())[0]
        if lst_inventory:
            for i in lst_inventory:
                app_id = i.get('appid')
                playtime_forever = i.get('playtime_forever', 0)
                if playtime_forever > 0:
                    dict_owned_games.update({
                        user_id : {
                            'app_id' : app_id,
                            'playtime_forever' : playtime_forever
                        }
                    })

In [74]:
df_owned_games = pd.DataFrame.from_dict(dict_owned_games, 'index')

In [75]:
df_owned_games

Unnamed: 0,app_id,playtime_forever
76561198074188133,570,655
76561198058088990,630,9
76561198042649112,17520,99
76561198094755500,259080,101
76561198214403939,570,92258
...,...,...
76561197994119296,630,452
76561198104816794,250820,1752
76561198164546058,1840,386
76561198080128853,570,179731


In [68]:
df_owned_games.to_sql(
        'steam_owned_games', 
        engine, 
        if_exists='replace', 
        index=False, 
        dtype={
            'user_id': BigInteger(),
            'app_id': Integer(),
            'playtime_forever': Integer()
        }, 
        chunksize = 10000
    )

In [71]:
df_owned_games.reset_index()

Unnamed: 0,level_0,level_1,user_id,app_id,playtime_forever
0,76561198074188133,4000,76561198074188133,4000,3415
1,76561198074188133,34030,76561198074188133,34030,16526
2,76561198074188133,42680,76561198074188133,42680,4631
3,76561198074188133,42690,76561198074188133,42690,11055
4,76561198074188133,207610,76561198074188133,207610,126
...,...,...,...,...,...
235595,76561198162029433,204300,76561198162029433,204300,4
235596,76561198162029433,554620,76561198162029433,554620,390
235597,76561198162029433,433850,76561198162029433,433850,121
235598,76561198162029433,611500,76561198162029433,611500,20


## Clean App Details

In [36]:
def parse_steam_app_details(app_data):
    developers = ', '.join(app_data.get('developers', []))
    if not developers:
        developers = None
    publishers = ', '.join(app_data.get('publishers', []))
    if not publishers:
        publishers = None
    name = app_data.get('name')
    required_age = app_data.get('required_age')
    short_description = app_data.get('short_description')
    if not short_description:
        short_description = None
    app_type = app_data.get('type')
    header_image = app_data.get('header_image')
    fullgame = app_data.get('fullgame',{}).get('appid')
    lst_categories = app_data.get('categories',[])
    if lst_categories:
        categories = ', '.join([i.get('description') for i in lst_categories])
    else:
        categories = None
    lst_genres = app_data.get('genres',[])
    if lst_genres:
        genres = ', '.join([i.get('description') for i in lst_genres])
    else:
        genres = None
    supported_languages = app_data.get('supported_languages')
    if supported_languages:
        supported_languages = supported_languages.replace('<strong>*</strong>', '').replace('<br>languages with full audio support','')
    if app_data.get('is_free') == True:
        initial_price = 0
        currency = 'USD'
    else:
        if app_data.get('price_overview',{}):
            initial_price = app_data.get('price_overview',{}).get('initial', 0) / 100
            currency = app_data.get('price_overview',{}).get('currency')
        else:
            initial_price = None
            currency = None
    if app_data.get('release_date',{}).get('coming_soon') == False:
        release_date = app_data.get('release_date',{}).get('date')
        if release_date:
            try:
                release_date = datetime.strptime(release_date, '%b %d, %Y').date()
            except Exception as e:
                try:
                    release_date = datetime.strptime(release_date, '%d %b, %Y').date()
                except:
                    try:
                        release_date = datetime.strptime(release_date, '%b %Y').date()
                    except:
                        release_date = None
        else:
            release_date = None
    else:
        release_date = None

    dic_steam_app = {
        'name' : name,
        'type' : app_type,
        'release_date' : release_date,
        'currency' : currency,
        'initial_price' : initial_price,
        'short_description' : short_description,
        'header_image' : header_image,
        'fullgame' : fullgame,
        'developers' : developers,
        'publishers' : publishers,
        'required_age' : required_age,
        'supported_languages' : supported_languages,
        'categories' : categories,
        'genres' : genres,
    }

    return dic_steam_app

In [37]:
dic_app_details = {}
with open('data/app_detail.txt', 'r') as f:
    for i in f.readlines():
        try:
            for app_id, dic_response in json.loads(i).items():
                if dic_response.get('success'):
                    dic_app_details[app_id] = parse_steam_app_details(dic_response.get('data',{}))
        except:
            pass

In [38]:
df_steam_app = pd.DataFrame.from_dict(dic_app_details, 'index')
df_steam_app.index.name = 'app_id'
df_steam_app.reset_index(inplace=True)

In [69]:
df_steam_app.head()

Unnamed: 0,app_id,name,type,release_date,currency,initial_price,short_description,header_image,fullgame,developers,publishers,required_age,supported_languages,categories,genres
0,1494410,The Tower Of TigerQiuQiu Match 3,dlc,,USD,0.99,"AboutStandard elimination games, which arrange...",https://steamcdn-a.akamaihd.net/steam/apps/149...,1103410.0,TigerQiuQiu,TigerQiuQiu,0,English,"Single-player, Downloadable Content","Action, Casual, Indie"
1,1494460,Desktop+,game,,USD,0.0,Desktop+ is a utility application for SteamVR ...,https://steamcdn-a.akamaihd.net/steam/apps/149...,,elvissteinjr,elvissteinjr,0,English,Steam Cloud,Utilities
2,1494500,FWsim - Fireworks Display Simulator,game,,,,Light the sky and express your creativity with...,https://steamcdn-a.akamaihd.net/steam/apps/149...,,Lukas Trötzmüller,Lukas Trötzmüller,0,"English, French, Italian, German, Spanish - Spain","Single-player, Includes level editor","Simulation, Early Access"
3,1494510,Demon Killer VR,game,,,,Use swords to slash through demons in this exc...,https://steamcdn-a.akamaihd.net/steam/apps/149...,,ZoneStudios,ZoneStudios,0,"English, French, Italian, German, Spanish - Sp...",Single-player,Action
4,1494550,Cars vs Zombies,game,,USD,0.99,You are in a post-apocalyptic world fighting h...,https://steamcdn-a.akamaihd.net/steam/apps/149...,,hulixxx,hulixxx,0,English,Single-player,"Casual, Indie, Racing, Sports"


## Load into MySQL

In [42]:
user = 'root'
password = 'jenny3248'
host = '127.0.0.1'
db_name = 'steam'
engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}/{db_name}?charset=utf8mb4')

In [43]:
df_steam_app.to_sql(
    'steam_app_details', 
    engine, 
    if_exists='replace', 
    index=False, 
    chunksize = 10000, 
    dtype={'app_id':Integer(), 'required_age':Integer()})