In [1]:
import sys, os
from pymongo import MongoClient

from plotly.offline import init_notebook_mode, iplot
from plotly.graph_objs import *

from IPython.display import HTML, display

# https://stackoverflow.com/questions/41323423/plotly-inside-jupyter-notebook-python
init_notebook_mode(connected=True) # initiate notebook for offline plot

import pandas as pd
from datetime import datetime, timezone, timedelta

In [2]:
sys.path.append("..")
import config # ../config.py

# run on dellxps
config.mongodb_ip = "192.168.1.114"

client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port)
db = client['steam']

## Collection Last Updated Time Deltas

In [3]:
apps_last_date = pd.DataFrame(list(db['apps'].find({}, {"updated_date":1, "_id":False}).sort([("updated_date",-1)]).limit(1)))['updated_date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
pricehistory_last_date = pd.DataFrame(list(db['pricehistory'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
steamusers_last_date = pd.DataFrame(list(db['steamusers'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
topgames_last_date = pd.DataFrame(list(db['topgames'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
opencritic_last_date = pd.DataFrame(list(db['opencritic'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)
twitchhistorical_last_date = pd.DataFrame(list(db['twitchhistorical'].find({}, {"date":1, "_id":False}).sort([("date",-1)]).limit(1)))['date'][0].to_pydatetime().replace(tzinfo=timezone.utc)

# https://stackoverflow.com/questions/6574329/how-can-i-produce-a-human-readable-difference-when-subtracting-two-unix-timestam
t = (datetime.now(timezone.utc) - apps_last_date).seconds
print('apps collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - pricehistory_last_date).seconds
print('pricehistory collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - steamusers_last_date).seconds
print('steamusers collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - topgames_last_date).seconds
print('topgames collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - opencritic_last_date).seconds
print('opencritic collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))
t = (datetime.now(timezone.utc) - twitchhistorical_last_date).seconds
print('twitchhistorical collection last updated: %d hours, %d minutes, %d seconds ago' % (t//3600, t%3600//60, t%60))

apps collection last updated: 0 hours, 44 minutes, 22 seconds ago
pricehistory collection last updated: 0 hours, 10 minutes, 14 seconds ago
steamusers collection last updated: 4 hours, 50 minutes, 1 seconds ago
topgames collection last updated: 0 hours, 3 minutes, 55 seconds ago
opencritic collection last updated: 0 hours, 35 minutes, 40 seconds ago
twitchhistorical collection last updated: 14 hours, 5 minutes, 34 seconds ago


## Number of Steam users online

In [4]:
pastday = datetime.today() - timedelta(days=90)
all_users_df = pd.DataFrame(list(db['steamusers'].find({"date": {"$gte": pastday}}, {"date":1, "numberonlineusers":1, '_id':False})))
all_users_df

Unnamed: 0,date,numberonlineusers
0,2022-04-22 17:17:38,25475922
1,2022-04-22 17:18:01,25468992
2,2022-04-22 17:22:24,25395491
3,2022-04-22 17:22:46,25389310
4,2022-04-22 17:27:09,25314867
...,...,...
59953,2022-07-21 18:06:20,23287630
59954,2022-07-21 18:11:06,23245523
59955,2022-07-21 18:15:52,23200867
59956,2022-07-21 18:20:38,23144804


In [5]:
# https://plot.ly/python/line-charts/

data = [Scatter(
        x=all_users_df['date'],
        y=all_users_df['numberonlineusers']
    )]

layout = Layout(
        title='Number of Steam users online over the past 90 days',
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Number of Users'
        )
    )

fig = Figure(data=data, layout=layout)

iplot(fig)

## Top 50 Steam Games By Number of Recommendations

In [6]:
recommendations_df = pd.DataFrame(list(
    db['apps'].aggregate([
        {'$match': {"recommendations.total": {"$exists":True}}},
        {"$project": {"_id": False, "appid": 1, "name": 1, "steam_recommendations": "$recommendations.total"}}
    ])
)).sort_values(by='steam_recommendations', ascending=False).head(50)

recommendations_df

Unnamed: 0,name,appid,steam_recommendations
25,Counter-Strike: Global Offensive,730,3404344
6753,PUBG: BATTLEGROUNDS,578080,1613428
2343,Grand Theft Auto V,271590,1231109
3904,Tom Clancy's Rainbow Six Siege,359550,890285
993,Terraria,105600,766125
121,Garry's Mod,4000,709358
1971,Rust,252490,642608
2691,The Witcher 3: Wild Hunt,292030,585645
9306,Among Us,945360,548077
5067,Wallpaper Engine,431960,478160


## Count of apps by type

In [7]:
display(HTML((pd.DataFrame(list(
    db['apps'].aggregate([
        {"$group" : {"_id":"$type", "count":{"$sum":1}}}
    ])
)).sort_values(by='count', ascending=False).to_html())))

print("Total apps: " + str(db['apps'].count_documents({})))

Unnamed: 0,_id,count
0,game,79272
11,dlc,39917
8,,11040
1,demo,11008
3,episode,8347
10,music,4168
6,movie,1739
2,video,1603
4,series,344
5,advertising,242


Total apps: 157793
