In [1]:
import polars as pl
import polars.selectors as cs

import altair as alt
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

In [2]:
df_path = r'/Users/zygimantas/Documents/Data_sets/steamcharts.csv'

In [3]:
df = pl.read_csv(df_path, infer_schema_length=10000)

In [4]:
df.collect_schema()

Schema([('month', String),
        ('avg_players', Float64),
        ('gain', String),
        ('gain_percent', Float64),
        ('peak_players', Int64),
        ('name', String),
        ('steam_appid', Int64)])

In [5]:
df.columns

['month',
 'avg_players',
 'gain',
 'gain_percent',
 'peak_players',
 'name',
 'steam_appid']

In [6]:
print(df.head())

shape: (5, 7)
┌────────┬─────────────┬─────────┬──────────────┬──────────────┬────────────────┬─────────────┐
│ month  ┆ avg_players ┆ gain    ┆ gain_percent ┆ peak_players ┆ name           ┆ steam_appid │
│ ---    ┆ ---         ┆ ---     ┆ ---          ┆ ---          ┆ ---            ┆ ---         │
│ str    ┆ f64         ┆ str     ┆ f64          ┆ i64          ┆ str            ┆ i64         │
╞════════╪═════════════╪═════════╪══════════════╪══════════════╪════════════════╪═════════════╡
│ Sep-25 ┆ 7805.25     ┆ 883.12  ┆ 0.1276       ┆ 13254        ┆ Counter-Strike ┆ 10          │
│ Aug-25 ┆ 6922.13     ┆ -449.35 ┆ -0.061       ┆ 12168        ┆ Counter-Strike ┆ 10          │
│ Jul-25 ┆ 7371.48     ┆ -833.5  ┆ -0.1016      ┆ 13951        ┆ Counter-Strike ┆ 10          │
│ Jun-25 ┆ 8204.98     ┆ -847.53 ┆ -0.0936      ┆ 15798        ┆ Counter-Strike ┆ 10          │
│ May-25 ┆ 9052.51     ┆ -471.31 ┆ -0.0495      ┆ 15333        ┆ Counter-Strike ┆ 10          │
└────────┴─────────────┴──

In [7]:
df = df.with_columns(
    pl.col('gain').str.replace('-', '0').cast(pl.Float64),
    (pl.lit('2025 ') + pl.col('month')).alias('month').str.strptime(pl.Date(), '%Y %b-%d')
)

In [8]:
df.columns

['month',
 'avg_players',
 'gain',
 'gain_percent',
 'peak_players',
 'name',
 'steam_appid']

In [9]:
df = df.rename({
    'month': 'data'
})

In [10]:
df

data,avg_players,gain,gain_percent,peak_players,name,steam_appid
date,f64,f64,f64,i64,str,i64
2025-09-25,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10
2025-08-25,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10
2025-07-25,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10
2025-06-25,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10
2025-05-25,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10
…,…,…,…,…,…,…
2025-04-25,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870
2025-03-25,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870
2025-02-25,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870
2025-01-25,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870


In [11]:
df.select([
    pl.col(col).n_unique().alias(col) for col in df.columns
])

data,avg_players,gain,gain_percent,peak_players,name,steam_appid
u32,u32,u32,u32,u32,u32,u32
159,82592,40889,40995,17097,6725,6729


In [12]:
df.null_count()

data,avg_players,gain,gain_percent,peak_players,name,steam_appid
u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0


In [13]:
df.columns

['data',
 'avg_players',
 'gain',
 'gain_percent',
 'peak_players',
 'name',
 'steam_appid']

In [14]:
df = df.rename({
    'steam_appid': 'steam_appid',
    'name': 'game_name'
})

In [15]:
df = df.with_columns(
    pl.when(pl.col('avg_players') != 0)
    .then(pl.col('peak_players') / pl.col('avg_players'))
    .otherwise(None)
    .alias('peak_to_avg_ratio')
)

In [16]:
df

data,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio
date,f64,f64,f64,i64,str,i64,f64
2025-09-25,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088
2025-08-25,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784
2025-07-25,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564
2025-06-25,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416
2025-05-25,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784
…,…,…,…,…,…,…,…
2025-04-25,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806
2025-03-25,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294
2025-02-25,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618
2025-01-25,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473


In [17]:
df = df.with_columns(
    (pl.col('gain_percent') > 0).alias('had_positive_gain')
)

In [18]:
year = df.select(
    pl.col('data').dt.year().alias('year')
)

month = df.select(
    pl.col('data').dt.month().alias('month')
)

In [19]:
df.insert_column(
  1, year.to_series()
)

data,year,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [20]:
df.insert_column(
  2, month.to_series()
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [21]:
df

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [22]:
df

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false
…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false


In [23]:
avg_players_summary = df.select(
    pl.col('avg_players').min().alias('min'),
    pl.col('avg_players').quantile(0.25).alias('Q1'),
    pl.col('avg_players').quantile(0.5).alias('Q2'),
    pl.col('avg_players').mean().alias('mean'),
    pl.col('avg_players').median().alias('median'),
    pl.col('avg_players').quantile(0.75).alias('Q3'),
    pl.col('avg_players').quantile(0.9).alias('Q9'),
    pl.col('avg_players').max().alias('max')
)

avg_players_summary

min,Q1,Q2,mean,median,Q3,Q9,max
f64,f64,f64,f64,f64,f64,f64,f64
0.0,2.53,10.61,593.294362,10.61,60.46,331.91,1584900.0


In [24]:
T1 = df['avg_players'].quantile(1/3)
T2 = df['avg_players'].quantile(2/3)

print(f"T1 (33.3rd percentile): {T1}")
print(f"T2 (66.6th percentile): {T2}")

T1 (33.3rd percentile): 4.1
T2 (66.6th percentile): 31.43


In [31]:
df = df.with_columns(
    pl.when(pl.col('avg_players') > T2).then(pl.lit('High'))
      .when(pl.col('avg_players') > T1).then(pl.lit('Medium'))
      .otherwise(pl.lit('Low'))
      .alias('size_category')
)

In [26]:
df.with_columns(
    pl.when(pl.col('avg_players') > avg_players_summary['Q1'].item())
      .then(pl.lit('Medium'))
    .when(pl.col('avg_players') > avg_players_summary['Q2'].item())
      .then(pl.lit('Large'))
    .when(pl.col('avg_players') > avg_players_summary['Q3'].item())
      .then(pl.lit('Huge'))
    .otherwise(pl.lit('Small')).alias('size_category')
)

data,year,month,avg_players,gain,gain_percent,peak_players,game_name,steam_appid,peak_to_avg_ratio,had_positive_gain,size_category
date,i32,i8,f64,f64,f64,i64,str,i64,f64,bool,str
2025-09-25,2025,9,7805.25,883.12,0.1276,13254,"""Counter-Strike""",10,1.698088,true,"""Medium"""
2025-08-25,2025,8,6922.13,449.35,-0.061,12168,"""Counter-Strike""",10,1.75784,false,"""Medium"""
2025-07-25,2025,7,7371.48,833.5,-0.1016,13951,"""Counter-Strike""",10,1.892564,false,"""Medium"""
2025-06-25,2025,6,8204.98,847.53,-0.0936,15798,"""Counter-Strike""",10,1.925416,false,"""Medium"""
2025-05-25,2025,5,9052.51,471.31,-0.0495,15333,"""Counter-Strike""",10,1.693784,false,"""Medium"""
…,…,…,…,…,…,…,…,…,…,…,…
2025-04-25,2025,4,2.48,0.92,-0.2709,8,"""The Ditzy Demons Are in Love W…",802870,3.225806,false,"""Small"""
2025-03-25,2025,3,3.4,0.19,-0.0532,11,"""The Ditzy Demons Are in Love W…",802870,3.235294,false,"""Medium"""
2025-02-25,2025,2,3.59,0.65,-0.1527,12,"""The Ditzy Demons Are in Love W…",802870,3.342618,false,"""Medium"""
2025-01-25,2025,1,4.23,0.53,-0.1119,11,"""The Ditzy Demons Are in Love W…",802870,2.600473,false,"""Medium"""


In [32]:
avg_players_sort = df.select(
    pl.col('avg_players').sort()
)

In [33]:
df.select(
    pl.col('size_category').unique()
)

size_category
str
"""Medium"""
"""High"""
"""Low"""
