# Steam Profile Opera - Creating Stories/Reports from Steam Usage

First step is to install the required libraries and make sure we do not have any dependency issue before we start.

In [None]:
from IPython.utils import io

# Import Block
import datetime as dt
import time
import json
from typing import List, Dict, Optional
import os
import math
import sys
sys.path.append("../steam_scrapper/")
from dataclasses import asdict

import requests
import pandas as pd
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.offline as offline
from dotenv import load_dotenv

from repos.repo import Repo
from repos.mongo_repo import SteamMongo

with io.capture_output() as captured: 

    # connects to DB
    mongo_db_url = os.environ.get("MONGO_DB_URL")
    repo = SteamMongo(mongo_url=mongo_db_url)
    
    # sets time values
    current_time = dt.datetime.now()
    two_weeks_ago = current_time - dt.timedelta(weeks=2)
    current_year = current_time.year
    current_month = current_time.month
    
    offline.init_notebook_mode()


In [None]:
# Retrieving Constants.
PLAYER_ID= os.environ.get("PLAYER_ID")
RUN_FRIENDS_STATS = os.environ.get("RUN_FRIENDS_STATS") == "True"

In [None]:
# Retrieves current user data
player_data = repo.get_player_info_by_id_list([PLAYER_ID])[0]
player_data = asdict(player_data)

In [None]:
print(f"Generating data for the profile {player_data['persona_name']} on {dt.datetime.now().strftime('%d/%m/%Y')}")

In [None]:
friend_list_ids_list = repo.get_friend_list_by_id(player_id=PLAYER_ID)[-1].friend_list
friend_list_ids = [item.steamid for item in friend_list_ids_list]
if friend_list_ids:
    friends_data = repo.get_player_info_by_id_list(friend_list_ids)
    friends_data = [asdict(item) for item in friends_data]
    friends_df = pd.DataFrame.from_dict(friends_data + [player_data])
    friends_df["last_year_online"] = friends_df["last_logoff"].apply(lambda x: dt.datetime.fromtimestamp(x).year if not math.isnan(x) else 0)
    friends_df["loccountrycode"] = friends_df["loc_country"].fillna("Not Informed")
    FRIEND_LIST_AVAILABLE = True
else:
    friends_df = pd.DataFrame()
    FRIEND_LIST_AVAILABLE = False
    print("Friend List is not available for this steam id.")

In [None]:
# Count Friends by Country
if FRIEND_LIST_AVAILABLE:
    fig = px.histogram(friends_df, x="loccountrycode", color="loccountrycode",
                        labels={
                            "loccountrycode":"Country Code"
                        },
                        title="Friends per Country",
                        color_discrete_sequence=px.colors.qualitative.Pastel,
                        width=640,
                        height=480)
    fig.show()

## Gameplay Information

If the user has available information regarding Total Gameplay, it will show here.

In [None]:

def fetch_gameplay_info(player_ids:str, current_year:int=None, current_month:int=None):
    """
    Fetches the list of games and played time for a player id.
    """
    all_gameplay = repo.get_gameplay_info_by_id_list(player_ids, created_year=current_year, created_month=current_month)
    final_gameplay = []
    for gameplay_list in all_gameplay:
        for gameplay_item in gameplay_list.gameplay_list:
            final_item = asdict(gameplay_item)
            final_item["player_id"] = gameplay_list.steamid
            final_gameplay.append(final_item)
    return final_gameplay

In [None]:
# fetch_gameplay_info(PLAYER_ID)

In [None]:
def get_game_details(appids:List[str]):
    """
    Retrieves game details from MongoDB
    """
    all_gameinfo = repo.get_game_info_by_game_id_list(game_id_list=appids)
    return [asdict(item) for item in all_gameinfo]

In [None]:
# get_game_details(["4000"])

In [None]:
def build_df_for_users(player_ids:str, who_is:str=None ):

    all_gameplay_list = fetch_gameplay_info(player_ids.split(","), current_year, current_month)
    
    
    if not all_gameplay_list:
        return pd.DataFrame()

    game_id_list = list(set([entry["appid"] for entry in all_gameplay_list]))
    game_details_list = []

    game_data_list = get_game_details(game_id_list)

    for game_data in game_data_list:
  
        game_info = {
            "appid": game_data["appid"],
            "name": game_data["name"],
            "genres": game_data.get("genres"),
            "metacritic":game_data.get("metacritic_score"),
            "short_description":game_data.get("description"),
            "categories":game_data.get("categories"),
            "release_date":game_data.get("release_date"),
            "developers":game_data.get("developers"),
            "publishers":game_data.get("publishers"),
            "is_free": game_data.get("is_free"),
            "type": game_data.get("type"),
            "age": game_data.get("age"),
        }

        if who_is:
            game_info.update({
            "who_is": who_is
        })
        game_details_list.append(game_info)
            
    gameplay_df = pd.DataFrame.from_dict(all_gameplay_list)
    game_info_df = pd.DataFrame.from_dict(game_details_list)
    result = pd.merge(gameplay_df,game_info_df, how="left",left_on="appid",right_on="appid")


    result["metacritic_score"] = result["metacritic"]
    result["release_year"] = result["release_date"].apply(lambda x: x.year)
    result["pc_developer"] = result["developers"].apply(lambda x: x[0] if type(x)==list and len(x) > 0 else None)
    result["genres_list"] = result["genres"]
    result["playtime_forever"] = result["playtime"]

    return result

        

In [None]:
gameplay_df = build_df_for_users(PLAYER_ID, who_is="me") 
# gameplay_df = build_df_for_user(76561198021990176)

In [None]:
exploded_genre_list = gameplay_df[["name","genres_list","playtime_forever"]].explode("genres_list")
exploded_genre_list["playtime_forever"] = exploded_genre_list["playtime_forever"].apply(lambda x: x//60)
exploded_genre_list.reset_index(inplace=True)
top_10_count_by_genre = exploded_genre_list.groupby(['genres_list'])\
    .agg({'genres_list':'count'})\
    .rename(columns={'genres_list':'count_genres_list'})\
    .sort_values('count_genres_list', ascending=False)\
    .head(10).copy()

In [None]:
top_10_count_by_genre = top_10_count_by_genre.reset_index()
fig = px.bar(top_10_count_by_genre, x='count_genres_list', y='genres_list', color="genres_list",
            labels={
                "count_genres_list":"Count of Games",
                "genres_list":"Genre"
            },
            title="Count of Games by Genre",
            color_discrete_sequence=px.colors.qualitative.Pastel,
            width=640,
            height=480)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
top_10_playtime_by_genre = exploded_genre_list.groupby(['genres_list'])\
    .agg({'playtime_forever':'sum'})\
    .rename(columns={'playtime_forever':'sum_playtime_forever'})\
    .sort_values('sum_playtime_forever', ascending=False)\
    .head(10).copy()
top_10_playtime_by_genre = top_10_playtime_by_genre.reset_index()
fig = px.bar(top_10_playtime_by_genre, x='sum_playtime_forever', y='genres_list', color="genres_list",
            labels={
                "sum_playtime_forever":"Playtime in hours",
                "genres_list":"Genre"
            },
            title="Total Playtime by Genre",
            color_discrete_sequence=px.colors.qualitative.Pastel,
            width=640,
            height=480)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Count By Release Year
fig = px.histogram(gameplay_df[gameplay_df["release_year"]>0], x="release_year", color="release_year",
        labels={
            "release_year":"Year"
        },
        title="Games by Year Released",
        color_discrete_sequence=px.colors.qualitative.Pastel,
        width=640,
        height=480)
fig.update_layout(showlegend=False)
fig.update_layout(bargap=0.2)
fig.show()

In [None]:
# My top 10 Games by Play Time

my_top_10 = gameplay_df.sort_values("playtime_forever", ascending=False).head(10).copy()
my_top_10["playtime_forever"] = my_top_10["playtime_forever"].apply(lambda x: x//60)
fig = px.bar(my_top_10, x='playtime_forever', y='name', color="name",
            labels={
                "playtime_forever":"Playtime in Hours",
                "name":"Game Name"
            },
            title="Top 10 Most Played Games",
            color_discrete_sequence=px.colors.qualitative.Pastel,
            width=960,
            height=480)
fig.update_layout(showlegend=False)
fig.show()

In [None]:

fig = px.scatter(gameplay_df[(gameplay_df["playtime_forever"]>30)&(gameplay_df["metacritic_score"].notnull())], x="release_year", y="playtime_forever",
                 color="metacritic_score",
                 hover_data=['metacritic_score', 'release_year', 'playtime_forever','name'],
                labels={
                "name": "Game",
                "release_year":"Release Year",
                "playtime_forever":"Play Time in Minutes",
                "metacritic_score":"Metacritic"
                },
                title="Playtime by Game Release Year and Metacritic Score",
                color_continuous_scale="bluered",
                width=640,
                height=480)
fig.show()


In [None]:
# Count by Metacritic
fig = px.histogram(gameplay_df, x="metacritic_score", color="metacritic_score",
        labels={
            "metacritic_score":"Metacritic Score"
        },
        title="Game Count by Metacritic Score",
        color_discrete_sequence=px.colors.qualitative.Pastel,
        width=640,
        height=480)
fig.update_layout(showlegend=False)
fig.update_layout(bargap=0.2)
fig.show()

In [None]:
# Top 10 count by developer
count_by_developer_df = gameplay_df.groupby(['pc_developer'])\
    .agg({'pc_developer':'count'})\
    .rename(columns={'pc_developer':'count_pc_developer'})\
    .sort_values('count_pc_developer', ascending=False)\
    .head(10)

In [None]:
fig = px.bar(count_by_developer_df.reset_index(), x='count_pc_developer', y='pc_developer', color="pc_developer",
            labels={
                "count_pc_developer":"Count of Games",
                "pc_developer":"Developer"
            },
            title="Count of Games per Developer",
            color_discrete_sequence=px.colors.qualitative.Pastel,
            width=640,
            height=480)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Top 10 playtime by developer
count_by_developer_df = gameplay_df.groupby(['pc_developer'])\
    .agg({"playtime_forever":"sum"})\
    .rename(columns={'playtime_forever':'sum_playtime_forever'})\
    .sort_values('sum_playtime_forever', ascending=False)\
    .head(10)
count_by_developer_df["sum_playtime_forever"] = count_by_developer_df["sum_playtime_forever"].apply(lambda x: x//60)

In [None]:
fig = px.bar(count_by_developer_df.reset_index(), x='sum_playtime_forever', y='pc_developer', color="pc_developer",
            labels={
                "sum_playtime_forever":"Playtime in Hours",
                "pc_developer":"Developer"
            },
            title="Playtime by Game Developer",
            color_discrete_sequence=px.colors.qualitative.Pastel,
            width=640,
            height=480)
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Top 10 best rated in metacritid with less than 30 min
unplayed_games_df = gameplay_df[gameplay_df["playtime_forever"]<30]\
    .sort_values('metacritic_score', ascending=False)\
    .head(10)

In [None]:
fig = px.bar(unplayed_games_df.reset_index(), x='metacritic_score', y='name', color="playtime_forever",
            labels={
                "name":"Game",
                "metacritic_score":"Metacritic Score"
            },
            title="Never Played Games by Metacritic",
            color_discrete_sequence=px.colors.qualitative.Pastel,
            width=960,
            height=480)
fig.update_layout(showlegend=False)
fig.show()

## Friends Gameplay Information

If the user has available information regarding Friend List, it will show here.



In [None]:
RUN_FRIENDS_STATS=True

In [None]:
with io.capture_output() as captured:
    if RUN_FRIENDS_STATS and FRIEND_LIST_AVAILABLE:
        all_gameplay_df = gameplay_df.copy()
        friend_gameplay_df = build_df_for_users(",".join(friend_list_ids), who_is="friend")
        all_gameplay_df = pd.concat([all_gameplay_df,friend_gameplay_df])

In [None]:
if RUN_FRIENDS_STATS and FRIEND_LIST_AVAILABLE:
    final_gameplay_players_df = pd.merge(all_gameplay_df,friends_df[["steamid","persona_name","real_name","loccountrycode","last_year_online","avatar" ]], how="left",left_on="player_id",right_on="steamid")
    my_top_5 = gameplay_df.sort_values("playtime_forever", ascending=False).head(5)["name"]
    played_all_gameplay_df = final_gameplay_players_df[final_gameplay_players_df["playtime_forever"] > 60].copy()
    played_all_gameplay_df["playtime_forever"] = played_all_gameplay_df["playtime_forever"].apply(lambda x: x//60)
    


In [None]:
if RUN_FRIENDS_STATS and FRIEND_LIST_AVAILABLE:
    fig = px.strip(played_all_gameplay_df[played_all_gameplay_df["name"].isin(my_top_5)], 
                        x="playtime_forever", y="name", color='who_is',
                       hover_data=[  'playtime_forever','persona_name'],
                        labels={
                        "persona_name":"Steam Name",
                        "who_is":"Friend",
                        "name":"Game",
                        "playtime_forever":"Playtime in Hours"
                        },
                        title="My Top 5 Games by Playtime",
                        color_discrete_sequence=px.colors.qualitative.Pastel,
                        width=640,
                        height=480)
    fig.show()

In [None]:
if RUN_FRIENDS_STATS and FRIEND_LIST_AVAILABLE:
    # Merge DFs
    final_gameplay_players_df = pd.merge(all_gameplay_df,friends_df[["steamid","persona_name","real_name","loccountrycode","last_year_online","avatar" ]], how="left",left_on="player_id",right_on="steamid")


In [None]:
if RUN_FRIENDS_STATS and FRIEND_LIST_AVAILABLE:
    top_10_gameplay = final_gameplay_players_df.groupby(['steamid','persona_name'], as_index=False)\
        .agg({"playtime_forever":"sum"}, axis="columns")\
        .sort_values(ascending=False, by="playtime_forever")\
        .head(10)
    top_10_gameplay["playtime_forever"] = top_10_gameplay["playtime_forever"].apply(lambda x: x//60)


In [None]:
if RUN_FRIENDS_STATS and FRIEND_LIST_AVAILABLE:
    fig = px.bar(top_10_gameplay.reset_index(), x='playtime_forever', y='persona_name', color="persona_name",
                labels={
                    "playtime_forever":"Playtime in Hours",
                    "persona_name":"Steam Name"
                },
                title="Who played the most?",
                color_discrete_sequence=px.colors.qualitative.Pastel,
                width=640,
                height=480)
    fig.update_layout(showlegend=False)
    fig.show()