In [91]:
import sys, os
from pymongo import MongoClient

from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

from IPython.display import HTML, display

# https://stackoverflow.com/questions/41323423/plotly-inside-jupyter-notebook-python
init_notebook_mode(connected=True) # initiate notebook for offline plot

import pandas as pd
from datetime import datetime, timezone

In [92]:
sys.path.append("..")
import config # ../config.py

# run on raspberrypi3 Mongo database
# config.mongodb_ip = "192.168.1.224"

client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
db = client['steam']

## Collection Last Updated Time Deltas

In [93]:
apps_last_date = pd.DataFrame(list(db['apps'].find({}, {"updated_date":1, "_id":False}).sort([("updated_date",-1)]).limit(1)))['updated_date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
pricehistory_last_date = pd.DataFrame(list(db['pricehistory'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
steamusers_last_date = pd.DataFrame(list(db['steamusers'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
topgames_last_date = pd.DataFrame(list(db['topgames'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)

# https://stackoverflow.com/questions/6574329/how-can-i-produce-a-human-readable-difference-when-subtracting-two-unix-timestam
t = (datetime.now(timezone.utc) - apps_last_date).seconds
print('apps collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - pricehistory_last_date).seconds
print('pricehistory collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - steamusers_last_date).seconds
print('steamusers collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - topgames_last_date).seconds
print('topgames collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))

apps collection last updated: 2 hours, 16 minutes, 36 seconds ago
pricehistory collection last updated: 2 hours, 16 minutes, 36 seconds ago
steamusers collection last updated: 19 hours, 35 minutes, 37 seconds ago
topgames collection last updated: 0 hours, 0 minutes, 29 seconds ago


## Number of Steam users online

In [94]:
all_users_df = pd.DataFrame(list(db['steamusers'].find({}, {"date":1, "numberonlineusers":1, '_id':False})))
all_users_df.sort_values(by='date', ascending=False, inplace=True)
all_users_df

Unnamed: 0,date,numberonlineusers
32872,2019-02-11 08:08:32,11187333
33221,2019-02-11 08:03:46,11164222
33263,2019-02-11 07:59:01,11142270
33262,2019-02-11 07:54:15,11119250
33414,2019-02-11 07:49:29,11093533
32916,2019-02-11 07:44:44,11069641
33446,2019-02-11 07:39:58,11045268
33305,2019-02-11 07:35:12,11020298
33018,2019-02-11 07:30:27,10997792
33070,2019-02-11 07:25:41,10975904


In [95]:
# https://plot.ly/python/line-charts/

data = [Scatter(
        x=all_users_df['date'],
        y=all_users_df['numberonlineusers']
    )]

layout = Layout(
        title='Number of Steam users online',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Number of Users'
        )
    )

fig = Figure(data=data, layout=layout)

iplot(fig)

## Top 100 Steam Games Over Time

## Count of apps by type

In [96]:
display(HTML((pd.DataFrame(list(
    db['apps'].aggregate([
        {"$group" : {"_id":"$type", "count":{"$sum":1}}}
    ])
)).sort_values(by='count', ascending=False).to_html())))

print("Total apps: " + str(db['apps'].count_documents({})))

Unnamed: 0,_id,count
5,game,31857
9,dlc,18862
8,,9109
10,episode,8268
4,demo,2721
1,movie,1736
7,video,1585
2,series,337
3,advertising,246
6,mod,59


Total apps: 74796
