In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (./) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Steam Monthly Player Analysis

Analyzing player count trends across Steam games from 2012-2025. This dataset contains monthly average and peak player counts for thousands of games on the Steam platform.

**Dataset:** Steam Monthly Average Players (Kaggle)

**Goals:**
- Understand the distribution of player counts across games
- Identify trends and patterns over time
- Analyze top performing games
- Explore seasonality and growth patterns

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')
%matplotlib inline

In [None]:
# load the data - update path based on what's shown above
df = pd.read_csv('/kaggle/input/steam-monthly-average-players/steamcharts.csv')
print(f"Shape: {df.shape}")
df.head()

## Load and Prepare Data

In [None]:
# load data
df = pd.read_csv('/kaggle/input/steam-monthly-average-players/steamcharts.csv')

# basic data prep
df['gain'] = pd.to_numeric(df['gain'], errors='coerce')
df['date'] = pd.to_datetime(df['month'], format='%b-%y', errors='coerce')
df['year'] = df['date'].dt.year
df['month_num'] = df['date'].dt.month
df['month_name'] = df['date'].dt.month_name()
df['peak_to_avg_ratio'] = df['peak_players'] / (df['avg_players'] + 1)

print(f"Loaded {len(df):,} records for {df['name'].nunique():,} games")

## Top Games Time Series

In [None]:
# plot top games over time
games_to_plot = ['Counter-Strike 2', 'PUBG: BATTLEGROUNDS', 'Dota 2', 
                 'Team Fortress 2', 'Grand Theft Auto V Legacy', 'Rust']

plt.figure(figsize=(14, 8))
for game in games_to_plot:
    game_data = df[df['name'] == game].sort_values('date')
    if len(game_data) > 0:
        plt.plot(game_data['date'], game_data['avg_players'], label=game, linewidth=2)

plt.xlabel('Date')
plt.ylabel('Average Players')
plt.title('Player Trends for Top Games')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Correlation Heatmap

In [None]:
numeric_cols = ['avg_players', 'gain', 'gain_percent', 'peak_players', 'peak_to_avg_ratio']
corr_matrix = df[numeric_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, fmt='.2f')
plt.title('Correlation Matrix of Player Metrics')
plt.tight_layout()
plt.show()

## Seasonality Heatmap

In [None]:
monthly_pivot = df.groupby(['year', 'month_num'])['avg_players'].mean().reset_index()
heatmap_data = monthly_pivot.pivot(index='year', columns='month_num', values='avg_players')

plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data, cmap='YlOrRd', annot=False, fmt='.0f', cbar_kws={'label': 'Avg Players'})
plt.xlabel('Month')
plt.ylabel('Year')
plt.title('Average Players by Month and Year - Seasonality Heatmap')
plt.tight_layout()
plt.show()

## Distribution by Year

In [None]:
recent_years = df[df['year'] >= 2018].copy()

plt.figure(figsize=(14, 6))
sns.violinplot(data=recent_years, x='year', y='avg_players', inner='box')
plt.yscale('log')
plt.ylabel('Average Players (log scale)')
plt.title('Distribution of Player Counts by Year (2018-2025)')
plt.tight_layout()
plt.show()

## Top Games Multi-Metric Comparison

In [None]:
top_10 = df.groupby('name').agg({
    'avg_players': 'mean',
    'peak_players': 'max',
    'gain': 'mean',
    'month': 'count'
}).nlargest(10, 'avg_players')

top_10.columns = ['Avg Players', 'Max Peak', 'Avg Monthly Change', 'Months Tracked']

# normalize for comparison
top_10_norm = (top_10 - top_10.min()) / (top_10.max() - top_10.min())

fig, ax = plt.subplots(figsize=(12, 8))
top_10_norm.plot(kind='barh', ax=ax, width=0.8)
plt.xlabel('Normalized Value (0-1)')
plt.ylabel('Game')
plt.title('Top 10 Games - Multi-Metric Comparison (Normalized)')
plt.legend(title='Metric', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Growth Rate Distributions

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].hist(df['gain'].clip(-1000, 1000), bins=100, edgecolor='black', alpha=0.7, color='blue')
axes[0].axvline(x=0, color='red', linestyle='--', linewidth=2, label='No change')
axes[0].set_xlabel('Monthly Gain/Loss (clipped to ±1000)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Distribution of Absolute Player Changes')
axes[0].legend()

axes[1].hist(df['gain_percent'].clip(-50, 50), bins=100, edgecolor='black', alpha=0.7, color='green')
axes[1].axvline(x=0, color='red', linestyle='--', linewidth=2, label='No change')
axes[1].set_xlabel('Monthly Gain/Loss % (clipped to ±50%)')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Distribution of Percentage Changes')
axes[1].legend()

plt.tight_layout()
plt.show()

## Individual Game Lifecycles

In [None]:
interesting_games = ['PUBG: BATTLEGROUNDS', 'Rust', 'Warframe', 
                     'Counter-Strike 2', 'ARK: Survival Evolved']

fig, axes = plt.subplots(len(interesting_games), 1, figsize=(14, 12))

for i, game in enumerate(interesting_games):
    game_data = df[df['name'] == game].sort_values('date')
    if len(game_data) > 0:
        axes[i].plot(game_data['date'], game_data['avg_players'], linewidth=2, color='blue')
        axes[i].fill_between(game_data['date'], 0, game_data['avg_players'], alpha=0.3)
        axes[i].set_ylabel('Avg Players')
        axes[i].set_title(f'{game} - Player Count Over Time')
        axes[i].grid(True, alpha=0.3)
    else:
        axes[i].text(0.5, 0.5, f'{game} - No data available', 
                    ha='center', va='center', transform=axes[i].transAxes)
        axes[i].set_ylabel('Avg Players')

axes[-1].set_xlabel('Date')
plt.tight_layout()
plt.show()

## Animated Visualizations

Interactive animated charts showing how games evolved over time.

### Animated Line Chart - Top Games Over Time

In [None]:
top_15_games = df.groupby('name')['avg_players'].mean().nlargest(15).index.tolist()
top_games_data = df[df['name'].isin(top_15_games)].copy()
top_games_data = top_games_data.sort_values('date')

frames = []
years = sorted(top_games_data['year'].unique())

for year in years:
    frame_data = top_games_data[top_games_data['year'] <= year].copy()
    frame_data['frame'] = year
    frames.append(frame_data)

animated_df = pd.concat(frames)

fig = px.line(animated_df, 
              x='date', 
              y='avg_players',
              color='name',
              title='Top 15 Games - Player Count Over Time (Animated)',
              labels={'avg_players': 'Average Players', 'date': 'Date'},
              animation_frame='frame',
              range_y=[0, top_games_data['avg_players'].max() * 1.1])

fig.update_layout(height=600, hovermode='x unified')
fig.show()

### Racing Bar Chart - Top Games Rankings Over Time

In [None]:
# prepare data
monthly_data = df.groupby(['date', 'name'])['avg_players'].sum().reset_index()

def get_top_n_per_month(group, n=15):
    return group.nlargest(n, 'avg_players')

top_monthly = monthly_data.groupby('date').apply(get_top_n_per_month).reset_index(drop=True)
top_monthly['rank'] = top_monthly.groupby('date')['avg_players'].rank(ascending=False, method='first')

# filter to 2016 onwards for smoother animation
top_monthly = top_monthly[top_monthly['date'] >= '2016-01-01']

print(f"Prepared {top_monthly['date'].nunique()} months of data")

In [None]:
# basic racing bar chart
fig = px.bar(top_monthly,
             x='avg_players',
             y='name',
             orientation='h',
             color='name',
             animation_frame='date',
             range_x=[0, top_monthly['avg_players'].max() * 1.1],
             title='Racing Bar Chart - Top 15 Games by Player Count',
             labels={'avg_players': 'Average Players', 'name': 'Game'})

fig.update_layout(
    height=700,
    showlegend=False,
    yaxis={'categoryorder': 'total ascending'}
)

fig.show()

### Enhanced Racing Bar Chart (Better Sorting)

In [None]:
top_monthly_sorted = top_monthly.sort_values(['date', 'avg_players'], ascending=[True, False])

fig = go.Figure()

dates = sorted(top_monthly_sorted['date'].unique())

# create frames
frames = []
for date in dates:
    frame_data = top_monthly_sorted[top_monthly_sorted['date'] == date].sort_values('avg_players')
    
    frames.append(go.Frame(
        data=[go.Bar(
            x=frame_data['avg_players'],
            y=frame_data['name'],
            orientation='h',
            marker=dict(color=frame_data['name'].astype('category').cat.codes),
            text=frame_data['avg_players'].apply(lambda x: f'{x:,.0f}'),
            textposition='outside'
        )],
        name=str(date)[:7],
        layout=go.Layout(title_text=f"Top Games - {str(date)[:7]}")
    ))

# initial data
initial_data = top_monthly_sorted[top_monthly_sorted['date'] == dates[0]].sort_values('avg_players')
fig.add_trace(go.Bar(
    x=initial_data['avg_players'],
    y=initial_data['name'],
    orientation='h',
    marker=dict(color=initial_data['name'].astype('category').cat.codes),
    text=initial_data['avg_players'].apply(lambda x: f'{x:,.0f}'),
    textposition='outside'
))

fig.frames = frames

fig.update_layout(
    title='Top 15 Games Racing Bar Chart',
    xaxis=dict(range=[0, top_monthly_sorted['avg_players'].max() * 1.15], title='Average Players'),
    yaxis=dict(title='Game'),
    height=700,
    updatemenus=[dict(
        type='buttons',
        showactive=False,
        buttons=[dict(label='Play',
                     method='animate',
                     args=[None, dict(frame=dict(duration=200, redraw=True),
                                     fromcurrent=True)]),
                dict(label='Pause',
                     method='animate',
                     args=[[None], dict(frame=dict(duration=0, redraw=False),
                                       mode='immediate',
                                       transition=dict(duration=0))])]
    )],
    sliders=[dict(
        steps=[dict(method='animate',
                   args=[[f.name],
                         dict(mode='immediate',
                             frame=dict(duration=200, redraw=True),
                             transition=dict(duration=0))],
                   label=f.name) for f in frames],
        active=0,
        y=0,
        len=0.9,
        x=0.1
    )]
)

fig.show()

### Platform Growth Animation

In [None]:
platform_growth = df.groupby('date').agg({
    'avg_players': 'sum',
    'steam_appid': 'nunique'
}).reset_index()
platform_growth.columns = ['date', 'total_players', 'num_games']
platform_growth['year'] = platform_growth['date'].dt.year

fig = px.scatter(platform_growth,
                x='num_games',
                y='total_players',
                animation_frame='date',
                size='total_players',
                color='year',
                hover_name='date',
                title='Platform Growth: Games vs Total Players Over Time',
                labels={'num_games': 'Number of Games', 'total_players': 'Total Average Players'},
                range_x=[0, platform_growth['num_games'].max() * 1.1],
                range_y=[0, platform_growth['total_players'].max() * 1.1])

fig.update_layout(height=600)
fig.show()