In [1]:
import polars as pl
import polars.selectors as cs

import altair as alt
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

In [2]:
df_path = r'/Users/zygimantas/Documents/Data_sets/steamcharts.csv'

In [3]:
df = pl.read_csv(df_path, infer_schema_length=10000)

In [4]:
df.collect_schema()

Schema([('month', String),
        ('avg_players', Float64),
        ('gain', String),
        ('gain_percent', Float64),
        ('peak_players', Int64),
        ('name', String),
        ('steam_appid', Int64)])

In [5]:
df.columns

['month',
 'avg_players',
 'gain',
 'gain_percent',
 'peak_players',
 'name',
 'steam_appid']

In [6]:
print(df.head())

shape: (5, 7)
┌────────┬─────────────┬─────────┬──────────────┬──────────────┬────────────────┬─────────────┐
│ month  ┆ avg_players ┆ gain    ┆ gain_percent ┆ peak_players ┆ name           ┆ steam_appid │
│ ---    ┆ ---         ┆ ---     ┆ ---          ┆ ---          ┆ ---            ┆ ---         │
│ str    ┆ f64         ┆ str     ┆ f64          ┆ i64          ┆ str            ┆ i64         │
╞════════╪═════════════╪═════════╪══════════════╪══════════════╪════════════════╪═════════════╡
│ Sep-25 ┆ 7805.25     ┆ 883.12  ┆ 0.1276       ┆ 13254        ┆ Counter-Strike ┆ 10          │
│ Aug-25 ┆ 6922.13     ┆ -449.35 ┆ -0.061       ┆ 12168        ┆ Counter-Strike ┆ 10          │
│ Jul-25 ┆ 7371.48     ┆ -833.5  ┆ -0.1016      ┆ 13951        ┆ Counter-Strike ┆ 10          │
│ Jun-25 ┆ 8204.98     ┆ -847.53 ┆ -0.0936      ┆ 15798        ┆ Counter-Strike ┆ 10          │
│ May-25 ┆ 9052.51     ┆ -471.31 ┆ -0.0495      ┆ 15333        ┆ Counter-Strike ┆ 10          │
└────────┴─────────────┴──

In [7]:
df = df.with_columns(
    pl.col('gain').str.replace('-', '0').cast(pl.Float64),
    (pl.lit('2025 ') + pl.col('month')).alias('month').str.strptime(pl.Date(), '%Y %b-%d')
)

In [8]:
df.columns

['month',
 'avg_players',
 'gain',
 'gain_percent',
 'peak_players',
 'name',
 'steam_appid']

In [9]:
df = df.rename({
    'month': 'data'
})

In [10]:
df

data,avg_players,gain,gain_percent,peak_players,name,steam_appid
date,f64,f64,f64,i64,str,i64
2025-09-25,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10
2025-08-25,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10
2025-07-25,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10
2025-06-25,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10
2025-05-25,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10
…,…,…,…,…,…,…
2025-04-25,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870
2025-03-25,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870
2025-02-25,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870
2025-01-25,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870


In [11]:
df.select([
    pl.col(col).n_unique().alias(col) for col in df.columns
])

data,avg_players,gain,gain_percent,peak_players,name,steam_appid
u32,u32,u32,u32,u32,u32,u32
159,82592,40889,40995,17097,6725,6729


In [12]:
df.null_count()

data,avg_players,gain,gain_percent,peak_players,name,steam_appid
u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0


In [13]:
df.columns

['data',
 'avg_players',
 'gain',
 'gain_percent',
 'peak_players',
 'name',
 'steam_appid']

In [14]:
df = df.rename({
    'steam_appid': 'steam_appid',
    'name': 'game_name'
})

In [15]:
df = df.with_columns(
    pl.when(pl.col('avg_players') != 0)
    .then(pl.col('peak_players') / pl.col('avg_players'))
    .otherwise(None)
    .alias('peak_to_avg_ratio')
)

In [16]:
df

data,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio
date,f64,f64,f64,i64,str,i64,f64
2025-09-25,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088
2025-08-25,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784
2025-07-25,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564
2025-06-25,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416
2025-05-25,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784
…,…,…,…,…,…,…,…
2025-04-25,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806
2025-03-25,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294
2025-02-25,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618
2025-01-25,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473


In [17]:
df = df.with_columns(
    (pl.col('gain_percent') > 0).alias('had_positive_gain')
)

In [18]:
year = df.select(
    pl.col('data').dt.year().alias('year')
)

month = df.select(
    pl.col('data').dt.month().alias('month')
)

In [19]:
df.insert_column(
  1, year.to_series()
)

data,year,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [20]:
df.insert_column(
  2, month.to_series()
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [21]:
df

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [22]:
df

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [23]:
avg_players_summary = df.select(
    pl.col('avg_players').min().alias('min'),
    pl.col('avg_players').quantile(0.25).alias('Q1'),
    pl.col('avg_players').quantile(0.5).alias('Q2'),
    pl.col('avg_players').mean().alias('mean'),
    pl.col('avg_players').median().alias('median'),
    pl.col('avg_players').quantile(0.75).alias('Q3'),
    pl.col('avg_players').quantile(0.9).alias('Q9'),
    pl.col('avg_players').max().alias('max')
)

avg_players_summary

min,Q1,Q2,mean,median,Q3,Q9,max
f64,f64,f64,f64,f64,f64,f64,f64
0.0,2.53,10.61,593.294362,10.61,60.46,331.91,1584900.0


In [24]:
T1 = df['avg_players'].quantile(1/3)
T2 = df['avg_players'].quantile(2/3)

print(f"T1 (33.3rd percentile): {T1}")
print(f"T2 (66.6th percentile): {T2}")

T1 (33.3rd percentile): 4.1
T2 (66.6th percentile): 31.43


In [25]:
df = df.with_columns(
    pl.when(pl.col('avg_players') > T2).then(pl.lit('High'))
      .when(pl.col('avg_players') > T1).then(pl.lit('Medium'))
      .otherwise(pl.lit('Low'))
      .alias('size_category')
)

In [26]:
df.with_columns(
    pl.when(pl.col('avg_players') > avg_players_summary['Q1'].item())
      .then(pl.lit('Medium'))
    .when(pl.col('avg_players') > avg_players_summary['Q2'].item())
      .then(pl.lit('Large'))
    .when(pl.col('avg_players') > avg_players_summary['Q3'].item())
      .then(pl.lit('Huge'))
    .otherwise(pl.lit('Small')).alias('size_category')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""Medium"""
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""Medium"""
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""Medium"""
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""Medium"""
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""Medium"""
…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Small"""
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Medium"""
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Medium"""
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium"""


In [27]:
avg_players_sort = df.select(
    pl.col('avg_players').sort()
)

In [28]:
df.select(
    pl.col('size_category').unique()
)

size_category
str
"""Low"""
"""High"""
"""Medium"""


# Phase 3: Filtering & Sorting (EDA)

In [29]:
df.collect_schema()

Schema([('data', Date),
        ('year', Int32),
        ('month', Int8),
        ('avg_players', Float64),
        ('gain', Float64),
        ('gain_percent', Float64),
        ('peak_players', Int64),
        ('game_name', String),
        ('steam_appid', Int64),
        ('peak_to_avg_ratio', Float64),
        ('had_positive_gain', Boolean),
        ('size_category', String)])

In [30]:
df.filter(
    pl.col('game_name').str.contains('Marvel')
).select(
    pl.col('game_name').unique()
)

game_name
str
"""Marvel's Midnight Suns"""
"""LEGO® Marvel Super Heroes 2"""
"""Marvel: Ultimate Alliance 2"""
"""LEGO® Marvel™ Super Heroes"""
"""Marvel vs. Capcom: Infinite"""
"""Marvel: Ultimate Alliance"""


In [31]:
df.group_by(
    'game_name'
).agg([
    pl.col('peak_players').max().alias('max_peak_players'),
    pl.col('month').filter(pl.col('peak_players') == pl.col('peak_players').max()).first().alias('peak_month'),
]).sort('max_peak_players', descending=True)

game_name,max_peak_players,peak_month
str,i64,i8
"""PUBG: BATTLEGROUNDS""",3236027,1
"""Counter-Strike 2""",1818368,3
"""Terraria""",486918,5
"""Fallout 4""",471955,11
"""Life is Strange 2""",468634,9
…,…,…
"""Tower And Guardian 塔与守护者""",0,8
"""FormFish""",0,1
"""Lowpoly Hero""",0,1
"""Arcadia Beta""",0,8


In [32]:
df.filter(
    pl.col('avg_players') > 100_000
).group_by(
    'game_name'
).agg(
    pl.sum('avg_players').alias('total_avg_players')
)

game_name,total_avg_players
str,f64
"""Counter-Strike 2""",7.4216e7
"""Mount & Blade II: Bannerlord""",113492.99
"""Fallout 4""",226297.83
"""HELLDIVERS™ 2""",634203.57
"""Team Fortress 2""",551018.78
…,…
"""PUBG: BATTLEGROUNDS""",3.5798e7
"""Total War: THREE KINGDOMS""",102719.07
"""Monster Hunter: World""",408339.41
"""Grand Theft Auto V Legacy""",3.2275e6


In [33]:
df.filter(
    pl.col('gain_percent') < 0,
    pl.col('avg_players') > 1_000
).sort(
    by='gain_percent',descending=False
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str
2025-09-16,2025,9,2982.79,33993.62,-0.9193,9254,"""No Man's Sky""",275850,3.102464,false,"""High"""
2025-07-24,2025,7,1290.05,8992.25,-0.8745,7349,"""The Descendant""",351940,5.696678,false,"""High"""
2025-03-22,2025,3,12738.76,78488.12,-0.8604,30261,"""Dying Light 2 Stay Human: Relo…",534380,2.375506,false,"""High"""
2025-10-19,2025,10,1296.91,7695.08,-0.8558,3405,"""Remnant: From the Ashes""",617290,2.625471,false,"""High"""
2025-06-21,2025,6,2751.74,15031.19,-0.8453,10454,"""BIOMUTANT""",597820,3.799051,false,"""High"""
…,…,…,…,…,…,…,…,…,…,…,…
2025-06-17,2025,6,2099.76,0.23,-0.0001,3332,"""Dirty Bomb®""",333930,1.586848,false,"""High"""
2025-03-21,2025,3,1035.1,0.15,-0.0001,1477,"""Fallout Shelter""",588430,1.426915,false,"""High"""
2025-09-23,2025,9,2397.55,0.18,-0.0001,4400,"""CarX Drift Racing Online""",635260,1.835207,false,"""High"""
2025-01-22,2025,1,1010.52,0.09,-0.0001,1611,"""March of Empires""",702320,1.594229,false,"""High"""


In [34]:
df.filter(
    pl.col('game_name').is_in(['Counter-Strike', 'Dota 2'])
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High"""
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High"""
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High"""
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High"""
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High"""
…,…,…,…,…,…,…,…,…,…,…,…
2025-11-12,2025,11,29669.97,833.67,0.0289,56957,"""Counter-Strike""",10,1.919685,true,"""High"""
2025-10-12,2025,10,28836.29,596.27,-0.0203,56053,"""Counter-Strike""",10,1.943835,false,"""High"""
2025-09-12,2025,9,29432.56,3663.24,-0.1107,55321,"""Counter-Strike""",10,1.879585,false,"""High"""
2025-08-12,2025,8,33095.8,1043.4,-0.0306,53685,"""Counter-Strike""",10,1.622109,false,"""High"""


In [35]:
df.group_by(
    'game_name'
).agg(
    pl.col('avg_players').min().alias('min_avg_players')
).sort(
    'min_avg_players', descending=True
).filter(
    pl.col('min_avg_players') > 1000
)


game_name,min_avg_players
str,f64
"""Team Fortress 2""",36781.88
"""Grand Theft Auto V Legacy""",25230.19
"""HELLDIVERS™ 2""",23928.28
"""PUBG: BATTLEGROUNDS""",17551.14
"""Sid Meier’s Civilization® VI""",14059.3
…,…
"""Crusader Kings II""",1144.14
"""Portal 2""",1141.61
"""Call of Duty®: Black Ops III""",1051.97
"""AdVenture Capitalist""",1021.7


# Phase 4: Aggregation & Grouping (Analysis)

In [36]:
df.group_by(
    'game_name'
).agg(
    pl.mean('avg_players').alias('avg_players_mean'),
).sort(
    by='avg_players_mean', descending=True
)

game_name,avg_players_mean
str,f64
"""Counter-Strike 2""",471708.649623
"""PUBG: BATTLEGROUNDS""",348401.458252
"""Grand Theft Auto V Legacy""",77629.456905
"""HELLDIVERS™ 2""",67953.5435
"""Team Fortress 2""",61087.537547
…,…
"""Arcadia Beta""",0.0
"""FormFish""",0.0
"""Galaxy Force II™""",0.0
"""Tower And Guardian 塔与守护者""",0.0


In [37]:
df.group_by(
    'steam_appid'
).agg(
    pl.max('peak_players').alias('peak_players_max'),
).sort(
    by='peak_players_max', descending=True
)

steam_appid,peak_players_max
i64,i64
578080,3236027
730,1818368
105600,486918
377160,471955
532210,468634
…,…
72530,0
770790,0
770360,0
683960,0


In [38]:
df.group_by("game_name").len().sort(by="len", descending=True)

game_name,len
str,u32
"""Fallout: New Vegas""",318
"""DOOM 3""",315
"""Solitaire""",199
"""Rayman® Origins""",159
"""Sniper: Ghost Warrior""",159
…,…
"""Thrushbriar Hall""",1
"""Boomer Rampage""",1
"""Triple X Tycoon""",1
"""Sounds of Music""",1


In [39]:
df.group_by("game_name").agg(
    pl.len().alias("months_of_data")
).sort("months_of_data", descending=True)

game_name,months_of_data
str,u32
"""Fallout: New Vegas""",318
"""DOOM 3""",315
"""Solitaire""",199
"""Tropico Reloaded""",159
"""Alice: Madness Returns""",159
…,…
"""Tombo Breaker VR""",1
"""More Than Just Chess""",1
"""Neckbeards: Silly Squadron""",1
"""Duel Survival""",1


In [40]:
df.group_by(
    'game_name'
).agg(
    pl.sum('gain').alias('total_gain'),
).sort(
    by='total_gain', descending=True
)

game_name,total_gain
str,f64
"""Counter-Strike 2""",4.5703e6
"""PUBG: BATTLEGROUNDS""",3.9869e6
"""Path of Exile""",1.8674e6
"""Grand Theft Auto V Legacy""",1.4839e6
"""Team Fortress 2""",880971.85
…,…
"""Anima Flux""",0.0
"""Squishies""",0.0
"""Arcadia Beta""",0.0
"""Snowballer""",0.0


In [41]:
df.group_by(
    'game_name'
).agg(
    pl.col('month').max().alias('last_month'),
)

game_name,last_month
str,i8
"""Dominions 4: Thrones of Ascens…",12
"""Crossout""",12
"""Head Shot""",12
"""MarZ: Tactical Base Defense""",12
"""Eaten Alive""",12
…,…
"""Ultimate Epic Battle Simulator""",12
"""SYNTHETIK: Legion Rising""",12
"""Blender""",12
"""Everyday Genius: SquareLogic""",12


In [42]:
df.group_by(
    'year'
).agg(
    pl.mean('avg_players').alias('avg_players_mean'),
)

year,avg_players_mean
i32,f64
2025,593.294362


# Phase 5: Joining & Reshaping (Advanced)

In [43]:
game_info = pl.DataFrame({
    "game_id": [10, 730, 570, 440],
    "genre": ["Shooter", "Shooter", "MOBA", "Shooter"],
    "release_year": [2000, 2012, 2013, 2007]
})

In [44]:
joined_df = df.join(
    game_info,
    left_on='steam_appid',
    right_on='game_id',
    how='left',
)

In [45]:
joined_df.group_by(
    'genre'
).agg(
    pl.sum('peak_players').alias('total_peak_players'),
)

genre,total_peak_players
str,i64
"""Shooter""",146679273
,583616893


In [46]:
df.pivot(
    values="avg_players",
    index="game_name",
    on="month",
    aggregate_function="mean"
)

game_name,9,8,7,6,5,4,3,2,1,12,11,10
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""Counter-Strike""",12178.205,13000.875,13416.478571,11816.792308,12118.275385,13056.308462,13776.706154,14547.537692,15559.726154,14340.264615,13264.692308,12691.33
"""Team Fortress Classic""",60.707857,64.695,69.745,63.726154,61.38,63.770769,66.022308,68.762308,72.392308,68.374615,63.858462,60.556923
"""Day of Defeat""",131.739286,134.782857,138.409286,123.290769,126.365385,134.527692,143.110769,147.892308,152.966154,149.56,142.441538,136.726923
"""Deathmatch Classic""",4.208571,5.06,6.290714,4.900769,4.255385,4.077692,3.924615,4.113846,4.489231,4.786154,4.582308,3.873077
"""Half-Life: Opposing Force""",69.025,75.697143,97.656429,75.328462,68.193846,75.606154,85.058462,89.726923,107.449231,103.451538,82.539231,62.82
…,…,…,…,…,…,…,…,…,…,…,…,…
"""Tennis Kings VR""",0.105,0.09,0.17,0.09,0.07,0.065,0.15,0.32,0.1,0.1,0.46,0.05
"""Fhtagn! - Tales of the Creepin…",0.235,0.28,0.34,0.28,0.54,0.175,0.135,0.12,0.14,0.2,0.33,0.463333
"""NAIRI: Tower of Shirin""",1.74,2.3,4.54,0.986667,1.113333,1.163333,0.97,1.286667,1.38,1.836667,1.56,2.14
"""Crazy Machines VR""",0.03,0.01,0.08,0.02,0.03,0.03,0.07,0.16,0.2,0.14,0.41,1.97


# Window Function Tasks

In [55]:
df.with_columns(
    pl.col('avg_players').mean().over('game_name').alias('avg_players_mean')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,avg_players_mean
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",13305.465723
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",13305.465723
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",13305.465723
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",13305.465723
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",13305.465723
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",3.514
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",3.514
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",3.514
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",3.514


In [56]:
df.with_columns(
    pl.col('month').rank().over('avg_players').alias('rank')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,rank
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",1.0
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",1.0
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",1.0
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",1.0
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",1.0
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",103.0
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",70.0
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",37.5
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",13.0


In [57]:
df.with_columns(
    pl.col('month').first().over('avg_players').alias('rank')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,rank
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,i8
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",9
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",8
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",7
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",6
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",5
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",5
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",3
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",3
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",6


In [60]:
df.select(
    pl.col('game_name'),
    pl.col('data'),
    pl.col('avg_players'),
    pl.col('avg_players').shift(1).over('game_name').alias('prev_month_avg')
)

game_name,data,avg_players,prev_month_avg
str,date,f64,f64
"""Counter-Strike""",2025-09-25,7805.25,
"""Counter-Strike""",2025-08-25,6922.13,7805.25
"""Counter-Strike""",2025-07-25,7371.48,6922.13
"""Counter-Strike""",2025-06-25,8204.98,7371.48
"""Counter-Strike""",2025-05-25,9052.51,8204.98
…,…,…,…
"""The Ditzy Demons Are in Love W…",2025-04-25,2.48,4.48
"""The Ditzy Demons Are in Love W…",2025-03-25,3.4,2.48
"""The Ditzy Demons Are in Love W…",2025-02-25,3.59,3.4
"""The Ditzy Demons Are in Love W…",2025-01-25,4.23,3.59


In [66]:
df.with_columns(
    pl.col('avg_players').rolling_mean(3).over('game_name').alias('rolling_avg'),
    pl.col('avg_players').min().over('game_name').alias('min_avg'),
    pl.col('avg_players').max().over('game_name').alias('max_avg'),
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,rolling_avg,min_avg,max_avg
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64,f64,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",,6922.13,34814.47
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",,6922.13,34814.47
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",7366.286667,6922.13,34814.47
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",7499.53,6922.13,34814.47
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",8209.656667,6922.13,34814.47
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",3.546667,2.44,4.77
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",3.453333,2.44,4.77
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",3.156667,2.44,4.77
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",3.74,2.44,4.77


In [70]:
df.with_columns(
    pl.col('gain').cum_sum().over('game_name').alias('cum_gain')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,cum_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",883.12
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",1332.47
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",2165.97
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",3013.5
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",3484.81
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",4.98
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",5.17
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",5.82
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",6.35


In [71]:
df.with_columns(
    pl.col('avg_players')
    .rolling_mean(3)
    .over('game_name')
    .alias('rolling_avg')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,rolling_avg
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",7366.286667
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",7499.53
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",8209.656667
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",3.546667
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",3.453333
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",3.156667
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",3.74


In [76]:
df.with_columns(
    (pl.col('avg_players') / pl.col('avg_players').sum().over('game_name') * 100).alias('avg_players_percent')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,avg_players_percent
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",0.368943
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",0.327199
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",0.34844
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",0.387838
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",0.4279
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",7.057484
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",9.675583
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",10.216278
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",12.037564


In [77]:
df.with_columns(
    pl.col('gain').last().over('game_name').alias('last_gain')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category,last_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str,f64
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""High""",0.0
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""High""",0.0
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""High""",0.0
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""High""",0.0
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""High""",0.0
…,…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Low""",0.78
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Low""",0.78
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Low""",0.78
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium""",0.78
