In [10]:
import sys, os
from pymongo import MongoClient

from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

from IPython.display import HTML, display

# https://stackoverflow.com/questions/41323423/plotly-inside-jupyter-notebook-python
init_notebook_mode(connected=True) # initiate notebook for offline plot

import pandas as pd
from datetime import datetime, timezone, timedelta

In [11]:
sys.path.append("..")
import config # ../config.py

# run on raspberrypi3 Mongo database
#config.mongodb_ip = "192.168.1.224"
# run on Asus laptop Mongo database
#config.mongodb_ip = "192.168.1.124"

client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
db = client['steam']

## Collection Last Updated Time Deltas

In [12]:
apps_last_date = pd.DataFrame(list(db['apps'].find({}, {"updated_date":1, "_id":False}).sort([("updated_date",-1)]).limit(1)))['updated_date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
pricehistory_last_date = pd.DataFrame(list(db['pricehistory'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
steamusers_last_date = pd.DataFrame(list(db['steamusers'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
topgames_last_date = pd.DataFrame(list(db['topgames'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)

# https://stackoverflow.com/questions/6574329/how-can-i-produce-a-human-readable-difference-when-subtracting-two-unix-timestam
t = (datetime.now(timezone.utc) - apps_last_date).seconds
print('apps collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - pricehistory_last_date).seconds
print('pricehistory collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - steamusers_last_date).seconds
print('steamusers collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - topgames_last_date).seconds
print('topgames collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))

apps collection last updated: 0 hours, 27 minutes, 38 seconds ago
pricehistory collection last updated: 0 hours, 27 minutes, 41 seconds ago
steamusers collection last updated: 2 hours, 25 minutes, 17 seconds ago
topgames collection last updated: 0 hours, 12 minutes, 24 seconds ago


## Number of Steam users online

In [13]:
pastday = datetime.today() - timedelta(days=90)
all_users_df = pd.DataFrame(list(db['steamusers'].find({"date": {"$gte": pastday}}, {"date":1, "numberonlineusers":1, '_id':False})))
all_users_df

Unnamed: 0,date,numberonlineusers
0,2019-02-09 21:37:36,14785513
1,2019-02-09 21:38:14,14776281
2,2019-02-09 21:42:22,14713679
3,2019-02-09 21:42:59,14704161
4,2019-02-09 21:47:07,14639783
5,2019-02-09 21:47:45,14630023
6,2019-02-09 21:51:53,14567623
7,2019-02-09 21:52:31,14558177
8,2019-02-09 21:56:39,14490233
9,2019-02-09 21:57:16,14479831


In [14]:
# https://plot.ly/python/line-charts/

data = [Scatter(
        x=all_users_df['date'],
        y=all_users_df['numberonlineusers']
    )]

layout = Layout(
        title='Number of Steam users online over the past 90 days',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Number of Users'
        )
    )

fig = Figure(data=data, layout=layout)

iplot(fig)

## Top 100 Steam Games Over Time

## Count of apps by type

In [15]:
display(HTML((pd.DataFrame(list(
    db['apps'].aggregate([
        {"$group" : {"_id":"$type", "count":{"$sum":1}}}
    ])
)).sort_values(by='count', ascending=False).to_html())))

print("Total apps: " + str(db['apps'].count_documents({})))

Unnamed: 0,_id,count
5,game,34330
9,dlc,20592
8,,9344
10,episode,8279
4,demo,2908
1,movie,1736
7,video,1591
2,series,339
3,advertising,247
6,mod,60


Total apps: 79441
