# Polars

In [1]:
import polars as pl


### Load Data

In [2]:
DATA_DIR = "../data/dota"


In [3]:
pl_match = pl.read_csv(f"{DATA_DIR}/match.csv")
pl_cluster_regions = pl.read_csv(f"{DATA_DIR}/cluster_regions.csv")
pl_purchase_log = pl.read_csv(f"{DATA_DIR}/purchase_log.csv")
pl_item_id_names = pl.read_csv(f"{DATA_DIR}/item_ids.csv")
pl_players = pl.read_csv(f"{DATA_DIR}/players.csv")


In [4]:
# add region names to match data

match_with_region = pl_match.join(pl_cluster_regions, how="left", on="cluster").drop(
    "cluster"
)


In [5]:
# add item names to item purchases

purchases_with_item_names = (
    pl_purchase_log.join(pl_item_id_names, how="left", on="item_id")
    .drop("item_id")
    .groupby(["match_id", "player_slot", "item_name"])
    .agg(pl.col("time").list().keep_name())
    .groupby(["match_id", "player_slot"])
    .agg(
        [
            pl.apply(
                [pl.col("item_name"), pl.col("time")],
                lambda s: dict(zip(s[0], s[1].to_list())),
            ).alias("purchases")
        ]
    )
)

purchases_with_item_names.head()


match_id,player_slot,purchases
i64,i64,object
34580,0,"{'gloves': [2110], 'belt_of_strength': [538, 2114], 'ring_of_regen': [1361], 'necronomicon_3': [1202], 'boots': [334], 'ghost': [1894], 'stout_shield': [-68], 'necronomicon': [924], 'power_treads': [2119], 'clarity': [-65], 'poor_mans_shield': [71], 'staff_of_wizardry': [659, 1312], 'tango': [-69], 'necronomicon_2': [1071], 'force_staff': [1587], 'slippers': [71, 71], 'flask': [-66, 237], 'tpscroll': [234, 378, 538, 689, 719, 1207, 1360, 1427, 1694], 'ward_observer': [-67]}"
41841,0,"{'staff_of_wizardry': [1246, 2571], 'flying_courier': [263], 'tango': [-79], 'point_booster': [2266], 'ring_of_protection': [514], 'circlet': [-71], 'branches': [-81, -81, 354], 'ward_sentry': [-86, 346, 750, 1009, 1249, 2021, 2257, 2263, 2480], 'force_staff': [1368], 'tpscroll': [117, 487, 659, 661, 1155, 1247, 1357, 1613, 1695, 2325], 'ward_observer': [-84, 346, 723, 724, 891, 1008, 1249, 1373, 1874, 1900, 2256, 2263, 2480], 'magic_wand': [377], 'tranquil_boots': [634], 'ring_of_regen': [619, 1356], 'ward_dispenser': [-84, 346, 751, 1009, 1249, 1403, 2021, 2257, 2263, 2480], 'boots': [483], 'magic_stick': [99], 'blink': [1844], 'ogre_axe': [2535]}"
42411,2,"{'cloak': [873], 'reaver': [2232], 'pipe': [1872], 'clarity': [-76], 'headdress': [1530], 'boots': [182], 'bottle': [423], 'ward_dispenser': [1366], 'ring_of_regen': [-77, 1051, 1079, 1529], 'tranquil_boots': [182], 'tpscroll': [205, 513, 679, 971, 1125, 1368, 1999], 'hood_of_defiance': [1140], 'ward_observer': [1366], 'ward_sentry': [1124, 1871, 1992], 'ring_of_health': [868], 'branches': [1529], 'ring_of_protection': [-79]}"
4648,4,"{'branches': [1085], 'robe': [697], 'clarity': [-79], 'headdress': [1085], 'travel_boots': [867], 'vladmir': [1209], 'manta': [1718], 'ring_of_basilius': [111], 'sobi_mask': [111], 'lifesteal': [1085], 'yasha': [1717], 'boots_of_elves': [1717], 'boots': [207], 'ultimate_orb': [1500], 'blade_of_alacrity': [389, 526, 1717], 'diffusal_blade': [697], 'flask': [-80], 'eagle': [1843], 'ring_of_regen': [1085], 'tpscroll': [768], 'ring_of_protection': [-81], 'tango': [-85, -78]}"
25148,129,"{'tpscroll': [131, 250], 'clarity': [-69, -67], 'bottle': [-79], 'boots': [213]}"


In [6]:
# add match and item information to player data

pl_final = (
    pl_players.filter(pl.col("account_id") != 0)
    .join(purchases_with_item_names, on=["match_id", "player_slot"])
    .join(match_with_region, how="left", on="match_id")
)

# drop cols with more than 20% NaN
pl_final = pl_final[
    :,
    [
        pl_final[col].null_count() <= 0.2 * pl_final.height
        for col in pl_final.columns
        if col not in ["purchases"]
    ]
    + ["purchases"],
]

print(pl_final)


shape: (318787, 67)
┌───────┬──────────┬─────────┬───────────┬─────┬───────────┬────────────┬────────────┬─────────────┐
│ match ┆ account_ ┆ hero_id ┆ player_sl ┆ ... ┆ radiant_w ┆ negative_v ┆ positive_v ┆ region      │
│ _id   ┆ id       ┆ ---     ┆ ot        ┆     ┆ in        ┆ otes       ┆ otes       ┆ ---         │
│ ---   ┆ ---      ┆ i64     ┆ ---       ┆     ┆ ---       ┆ ---        ┆ ---        ┆ str         │
│ i64   ┆ i64      ┆         ┆ i64       ┆     ┆ bool      ┆ i64        ┆ i64        ┆             │
╞═══════╪══════════╪═════════╪═══════════╪═════╪═══════════╪════════════╪════════════╪═════════════╡
│ 5371  ┆ 26654    ┆ 72      ┆ 129       ┆ ... ┆ false     ┆ 0          ┆ 0          ┆ US WEST     │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 45967 ┆ 71395    ┆ 70      ┆ 4         ┆ ... ┆ true      ┆ 0          ┆ 0          ┆ EUROPE      │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌

### Analyse the data

#### Win rates

In [7]:
pl_final["duration"].describe()


statistic,value
str,f64
"""min""",59.0
"""max""",16037.0
"""null_count""",0.0
"""mean""",2461.093294
"""std""",641.437472
"""count""",318787.0


In [8]:
pl_final["long_game"] = pl_final["duration"] > 4000  # mean + 2.5std
pl_final["short_game"] = pl_final["duration"] < 800  # mean - 2.5std

pl_final.groupby(["long_game", "short_game"]).agg(
    pl.col("radiant_win").cast(int).mean()
)
# radiant has a higher win rate in short games


long_game,short_game,radiant_win_mean
bool,bool,f64
False,False,0.516999
True,False,0.504734
False,True,0.652463
