In [1]:
import sys, os
from pymongo import MongoClient

from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

from IPython.display import HTML, display

# https://stackoverflow.com/questions/41323423/plotly-inside-jupyter-notebook-python
init_notebook_mode(connected=True) # initiate notebook for offline plot

import pandas as pd
from datetime import datetime, timezone, timedelta

In [2]:
sys.path.append("..")
import config # ../config.py

# run on raspberrypi3 Mongo database
#config.mongodb_ip = "192.168.1.224"
# run on Asus laptop Mongo database
#config.mongodb_ip = "192.168.1.124"
# run on Dell
config.mongodb_ip = "192.168.1.171"

client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
db = client['steam']

## Collection Last Updated Time Deltas

In [3]:
apps_last_date = pd.DataFrame(list(db['apps'].find({}, {"updated_date":1, "_id":False}).sort([("updated_date",-1)]).limit(1)))['updated_date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
pricehistory_last_date = pd.DataFrame(list(db['pricehistory'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
steamusers_last_date = pd.DataFrame(list(db['steamusers'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
topgames_last_date = pd.DataFrame(list(db['topgames'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
opencritic_last_date = pd.DataFrame(list(db['opencritic'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
twitchhistorical_last_date = pd.DataFrame(list(db['twitchhistorical'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)

# https://stackoverflow.com/questions/6574329/how-can-i-produce-a-human-readable-difference-when-subtracting-two-unix-timestam
t = (datetime.now(timezone.utc) - apps_last_date).seconds
print('apps collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - pricehistory_last_date).seconds
print('pricehistory collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - steamusers_last_date).seconds
print('steamusers collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - topgames_last_date).seconds
print('topgames collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - opencritic_last_date).seconds
print('opencritic collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - twitchhistorical_last_date).seconds
print('twitchhistorical collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))

apps collection last updated: 1 hours, 10 minutes, 16 seconds ago
pricehistory collection last updated: 1 hours, 10 minutes, 16 seconds ago
steamusers collection last updated: 23 hours, 16 minutes, 4 seconds ago
topgames collection last updated: 0 hours, 5 minutes, 28 seconds ago
opencritic collection last updated: 0 hours, 56 minutes, 40 seconds ago
twitchhistorical collection last updated: 0 hours, 6 minutes, 36 seconds ago


## Number of Steam users online

In [4]:
pastday = datetime.today() - timedelta(days=90)
all_users_df = pd.DataFrame(list(db['steamusers'].find({"date": {"$gte": pastday}}, {"date":1, "numberonlineusers":1, '_id':False})))
all_users_df

Unnamed: 0,date,numberonlineusers
0,2019-04-15 14:40:49,14656764
1,2019-04-15 14:40:56,14658018
2,2019-04-15 14:45:35,14692902
3,2019-04-15 14:45:42,14693654
4,2019-04-15 14:50:20,14724772
5,2019-04-15 14:50:28,14725678
6,2019-04-15 14:55:06,14756673
7,2019-04-15 14:55:13,14757443
8,2019-04-15 14:59:52,14782811
9,2019-04-15 14:59:59,14783395


In [5]:
# https://plot.ly/python/line-charts/

data = [Scatter(
        x=all_users_df['date'],
        y=all_users_df['numberonlineusers']
    )]

layout = Layout(
        title='Number of Steam users online over the past 90 days',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Number of Users'
        )
    )

fig = Figure(data=data, layout=layout)

iplot(fig)

## Top 100 Steam Games Over Time

## Count of apps by type

In [6]:
display(HTML((pd.DataFrame(list(
    db['apps'].aggregate([
        {"$group" : {"_id":"$type", "count":{"$sum":1}}}
    ])
)).sort_values(by='count', ascending=False).to_html())))

print("Total apps: " + str(db['apps'].count_documents({})))

Unnamed: 0,_id,count
3,game,36066
9,dlc,21614
8,,9429
10,episode,8285
2,demo,3085
4,movie,1736
7,video,1592
5,series,340
1,advertising,247
6,mod,64


Total apps: 82473
