In [2]:
import sys, os
from pymongo import MongoClient

from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

from IPython.display import HTML, display

# https://stackoverflow.com/questions/41323423/plotly-inside-jupyter-notebook-python
init_notebook_mode(connected=True) # initiate notebook for offline plot

import pandas as pd
from datetime import datetime, timezone, timedelta

In [3]:
sys.path.append("..")
import config # ../config.py

# run on Asus laptop Mongo database
#config.mongodb_ip = "192.168.1.124"
# run on Dell
config.mongodb_ip = "192.168.1.171"

client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
db = client['steam']

## Collection Last Updated Time Deltas

In [4]:
apps_last_date = pd.DataFrame(list(db['apps'].find({}, {"updated_date":1, "_id":False}).sort([("updated_date",-1)]).limit(1)))['updated_date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
pricehistory_last_date = pd.DataFrame(list(db['pricehistory'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
steamusers_last_date = pd.DataFrame(list(db['steamusers'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
topgames_last_date = pd.DataFrame(list(db['topgames'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
opencritic_last_date = pd.DataFrame(list(db['opencritic'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
twitchhistorical_last_date = pd.DataFrame(list(db['twitchhistorical'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)

# https://stackoverflow.com/questions/6574329/how-can-i-produce-a-human-readable-difference-when-subtracting-two-unix-timestam
t = (datetime.now(timezone.utc) - apps_last_date).seconds
print('apps collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - pricehistory_last_date).seconds
print('pricehistory collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - steamusers_last_date).seconds
print('steamusers collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - topgames_last_date).seconds
print('topgames collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - opencritic_last_date).seconds
print('opencritic collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - twitchhistorical_last_date).seconds
print('twitchhistorical collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))

apps collection last updated: 2 hours, 59 minutes, 27 seconds ago
pricehistory collection last updated: 0 hours, 46 minutes, 0 seconds ago
steamusers collection last updated: 15 hours, 50 minutes, 28 seconds ago
topgames collection last updated: 0 hours, 0 minutes, 53 seconds ago
opencritic collection last updated: 0 hours, 36 minutes, 7 seconds ago
twitchhistorical collection last updated: 4 hours, 50 minutes, 50 seconds ago


## Number of Steam users online

In [5]:
pastday = datetime.today() - timedelta(days=90)
all_users_df = pd.DataFrame(list(db['steamusers'].find({"date": {"$gte": pastday}}, {"date":1, "numberonlineusers":1, '_id':False})))
all_users_df

Unnamed: 0,date,numberonlineusers
0,2019-10-20 20:42:18,14493773
1,2019-10-20 20:42:26,14491641
2,2019-10-20 20:47:04,14404750
3,2019-10-20 20:47:12,14401588
4,2019-10-20 20:51:49,14321481
...,...,...
56338,2020-01-18 11:29:49,14980445
56339,2020-01-18 11:34:35,15057731
56340,2020-01-18 11:39:21,15135123
56341,2020-01-18 11:44:07,15200357


In [6]:
# https://plot.ly/python/line-charts/

data = [Scatter(
        x=all_users_df['date'],
        y=all_users_df['numberonlineusers']
    )]

layout = Layout(
        title='Number of Steam users online over the past 90 days',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Number of Users'
        )
    )

fig = Figure(data=data, layout=layout)

iplot(fig)

## Top 50 Steam Games By Number of Recommendations

In [17]:
recommendations_df = pd.DataFrame(list(
    db['apps'].aggregate([
        {'$match': {"recommendations.total": {"$exists":True}}},
        {"$project": {"_id": False, "appid": 1, "name": 1, "steam_recommendations": "$recommendations.total"}}
    ])
)).sort_values(by='steam_recommendations', ascending=False).head(50)

recommendations_df

Unnamed: 0,name,appid,steam_recommendations
25,Counter-Strike: Global Offensive,730,2524012
6124,PLAYERUNKNOWN'S BATTLEGROUNDS,578080,1070162
2194,Grand Theft Auto V,271590,595970
3634,Tom Clancy's Rainbow Six® Siege,359550,440364
112,Garry's Mod,4000,390130
912,Terraria,105600,294134
1840,Rust,252490,289611
2518,The Witcher 3: Wild Hunt,292030,265735
1854,Rocket League,252950,265550
1242,PAYDAY 2,218620,243320


## Count of apps by type

In [18]:
display(HTML((pd.DataFrame(list(
    db['apps'].aggregate([
        {"$group" : {"_id":"$type", "count":{"$sum":1}}}
    ])
)).sort_values(by='count', ascending=False).to_html())))

print("Total apps: " + str(db['apps'].count_documents({})))

Unnamed: 0,_id,count
3,game,41285
10,dlc,24829
7,,9529
11,episode,8296
8,demo,3566
4,movie,1739
6,video,1597
1,series,342
2,advertising,245
9,music,74


Total apps: 91581
