# Polars

In [1]:
import polars as pl


### Load Data

In [2]:
DATA_DIR = "../data/dota"


In [3]:
pl_match = pl.read_csv(f"{DATA_DIR}/match.csv")
pl_cluster_regions = pl.read_csv(f"{DATA_DIR}/cluster_regions.csv")
pl_purchase_log = pl.read_csv(f"{DATA_DIR}/purchase_log.csv")
pl_item_id_names = pl.read_csv(f"{DATA_DIR}/item_ids.csv")
pl_players = pl.read_csv(f"{DATA_DIR}/players.csv")


In [4]:
# add region names to match data

match_with_region = pl_match.join(pl_cluster_regions, how="left", on="cluster").drop(
    "cluster"
)


In [5]:
# add item names to item purchases

purchases_with_item_names = (
    pl_purchase_log.join(pl_item_id_names, how="left", on="item_id")
    .drop("item_id")
    .groupby(["match_id", "player_slot", "item_name"])
    .agg(pl.col("time").list().keep_name())
    .groupby(["match_id", "player_slot"])
    .agg(
        [
            pl.apply(
                [pl.col("item_name"), pl.col("time")],
                lambda s: dict(zip(s[0], s[1].to_list())),
            ).alias("purchases")
        ]
    )
)

purchases_with_item_names.head()


match_id,player_slot,purchases
i64,i64,object
20774,130,"{'power_treads': [1557], 'tpscroll': [724, 893, 1082, 1355, 1650, 1999], 'sobi_mask': [1082], 'staff_of_wizardry': [1191], 'flask': [-76], 'clarity': [-78, -77], 'cyclone': [1371], 'point_booster': [2101], 'ward_dispenser': [1851, 1868, 1997, 1997], 'boots': [304], 'flying_courier': [182], 'tango': [-76], 'ward_observer': [-80, -79, 305, 306, 784, 1089, 1355, 1570, 1850, 1995], 'gloves': [1522], 'belt_of_strength': [1468], 'courier': [-82], 'void_stone': [723], 'ward_sentry': [1997]}"
25486,4,"{'buckler': [990], 'branches': [-80, -80, 957, 1405], 'clarity': [-66], 'phase_boots': [505], 'blades_of_attack': [505, 505], 'tango': [-84], 'crimson_guard': [990], 'ultimate_scepter': [1874], 'ring_of_protection': [1405], 'headdress': [1405], 'stout_shield': [-81], 'courier': [1291], 'ring_of_basilius': [1405], 'vanguard': [839], 'ogre_axe': [1870], 'magic_wand': [209], 'ring_of_health': [839], 'vladmir': [1447], 'magic_stick': [163], 'staff_of_wizardry': [1870], 'sobi_mask': [1360], 'tpscroll': [259, 458, 524, 646, 1077, 1080, 1230, 1301], 'chainmail': [861], 'ring_of_regen': [1255], 'blade_of_alacrity': [1870], 'enchanted_mango': [-83], 'vitality_booster': [812], 'boots': [187], 'lifesteal': [1165], 'point_booster': [1789], 'circlet': [163]}"
21801,3,"{'circlet': [-83, 967], 'black_king_bar': [1715], 'ring_of_aquila': [637], 'mithril_hammer': [1578], 'gloves': [774], 'belt_of_strength': [601, 721, 2600], 'tango': [-84], 'ghost': [2003], 'claymore': [1302], 'power_treads': [810], 'ring_of_protection': [618], 'magic_wand': [1003], 'ogre_axe': [1470, 2600], 'shadow_amulet': [1180], 'ring_of_basilius': [618], 'ethereal_blade': [2364], 'magic_stick': [967], 'sobi_mask': [618], 'tpscroll': [159, 268, 268, 435, 547, 649, 680, 804, 964, 973, 1303, 1374, 1537, 1759, 1791, 2269, 2609], 'silver_edge': [2607], 'slippers': [-83], 'invis_sword': [1302], 'eagle': [2340], 'boots': [470], 'sange': [2600], 'wraith_band': [-83], 'bottle': [187], 'branches': [-72, -72]}"
40579,1,"{'boots_of_elves': [827], 'dust': [1085], 'blade_of_alacrity': [812], 'slippers': [-56], 'tpscroll': [21, 808, 814, 1087, 1387, 1522, 1900, 2098], 'lesser_crit': [2417], 'boots': [224], 'wraith_band': [-56], 'yasha': [1081], 'branches': [-53, -53], 'bottle': [159], 'phase_boots': [340], 'circlet': [-56], 'mithril_hammer': [1649, 2054], 'desolator': [2097], 'blades_of_attack': [287, 340, 2417], 'broadsword': [2417], 'manta': [1385], 'ultimate_orb': [1256], 'blink': [635]}"
4475,4,"{'blink': [3301], 'ogre_axe': [1834], 'butterfly': [2836], 'boots_of_elves': [1436], 'demon_edge': [3121], 'tpscroll': [585, 663, 771, 1027, 1175, 1228, 1838], 'talisman_of_evasion': [2570], 'javelin': [3249, 3276], 'blade_of_alacrity': [1525], 'yasha': [1704], 'boots': [-77], 'invis_sword': [769], 'eagle': [2807], 'monkey_king_bar': [3295], 'black_king_bar': [1938], 'mithril_hammer': [1111, 1347, 1747], 'branches': [-80, -76, -74], 'bottle': [212], 'gloves': [257], 'belt_of_strength': [314], 'quarterstaff': [2625], 'desolator': [1376], 'ultimate_orb': [2293], 'manta': [2383], 'flying_courier': [657], 'power_treads': [349], 'claymore': [639], 'shadow_amulet': [440]}"


In [6]:
# add match and item information to player data

pl_final = (
    pl_players.filter(pl.col("account_id") != 0)
    .join(purchases_with_item_names, on=["match_id", "player_slot"])
    .join(match_with_region, how="left", on="match_id")
)

# drop cols with more than 20% NaN
pl_final = pl_final[
    :,
    [
        pl_final[col].null_count() <= 0.2 * pl_final.height
        for col in pl_final.columns
        if col not in ["purchases"]
    ]
    + ["purchases"],
]

print(pl_final)


shape: (318787, 67)
┌──────────┬──────────┬─────────┬───────────┬─────┬───────────┬────────────┬────────────┬─────────┐
│ match_id ┆ account_ ┆ hero_id ┆ player_sl ┆ ... ┆ radiant_w ┆ negative_v ┆ positive_v ┆ region  │
│ ---      ┆ id       ┆ ---     ┆ ot        ┆     ┆ in        ┆ otes       ┆ otes       ┆ ---     │
│ i64      ┆ ---      ┆ i64     ┆ ---       ┆     ┆ ---       ┆ ---        ┆ ---        ┆ str     │
│          ┆ i64      ┆         ┆ i64       ┆     ┆ bool      ┆ i64        ┆ i64        ┆         │
╞══════════╪══════════╪═════════╪═══════════╪═════╪═══════════╪════════════╪════════════╪═════════╡
│ 20774    ┆ 82350    ┆ 68      ┆ 130       ┆ ... ┆ false     ┆ 0          ┆ 0          ┆ US EAST │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ 25486    ┆ 27484    ┆ 60      ┆ 4         ┆ ... ┆ true      ┆ 0          ┆ 0          ┆ EUROPE  │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌

### Analyse the data

#### Win rates

In [12]:
pl_final["duration"].describe()


statistic,value
str,f64
"""min""",59.0
"""max""",16037.0
"""null_count""",0.0
"""mean""",2461.093294
"""std""",641.437472
"""count""",318787.0


In [32]:
pl_final["long_game"] = pl_final["duration"] > 4000  # mean + 2.5std
pl_final["short_game"] = pl_final["duration"] < 800  # mean - 2.5std

pl_final.groupby(["long_game", "short_game"]).agg(
    pl.col("radiant_win").cast(int).mean()
)
# radiant has a higher win rate in short games


long_game,short_game,radiant_win_mean
bool,bool,f64
True,False,0.504734
False,False,0.516999
False,True,0.652463
