# Polars

In [3]:
import polars as pl

### Load Data

In [4]:
DATA_DIR = "../data/dota"

In [5]:
pl_match = pl.read_csv(f"{DATA_DIR}/match.csv")
pl_cluster_regions = pl.read_csv(f"{DATA_DIR}/cluster_regions.csv")
pl_purchase_log = pl.read_csv(f"{DATA_DIR}/purchase_log.csv")
pl_item_id_names = pl.read_csv(f"{DATA_DIR}/item_ids.csv")
pl_players = pl.read_csv(f"{DATA_DIR}/players.csv")

In [6]:
# add region names to match data

match_with_region = pl_match.join(pl_cluster_regions, how="left", on="cluster").drop("cluster")

In [7]:
# add item names to item purchases

purchases_with_item_names = (
    pl_purchase_log
    .join(pl_item_id_names, how="left", on="item_id")
    .drop("item_id")
    .groupby(["match_id", "player_slot", "item_name"]).agg(pl.col("time").list().keep_name())
    .groupby(["match_id", "player_slot"]).agg(
        [
            pl.apply(
                [pl.col("item_name"), pl.col("time")],
                lambda s: dict(zip(s[0], s[1].to_list())),
            ).alias("purchases")
        ]
    )
)

purchases_with_item_names.head()

match_id,player_slot,purchases
i64,i64,object
30215,0,"{'yasha': [1822], 'quelling_blade': [-75], 'diffusal_blade_2': [1601], 'reaver': [2232], 'vanguard': [612], 'tango': [-74], 'flask': [-74], 'tpscroll': [1599], 'robe': [1077], 'ring_of_health': [228], 'demon_edge': [2771], 'phase_boots': [672], 'diffusal_blade': [1169], 'boots': [296], 'ultimate_orb': [1438], 'boots_of_elves': [1691], 'magic_stick': [43], 'blade_of_alacrity': [769, 975, 1640], 'vitality_booster': [584, 2513], 'blades_of_attack': [625, 672], 'manta': [1952], 'heart': [2528], 'stout_shield': [422]}"
34423,129,"{'ring_of_basilius': [207], 'belt_of_strength': [1235], 'branches': [-53, -53, 514], 'black_king_bar': [2672], 'ancient_janggo': [895], 'mithril_hammer': [2594], 'ring_of_protection': [207], 'magic_wand': [540], 'tpscroll': [333, 515, 790, 1050, 1234, 1699, 1699, 2045, 2204, 2265], 'robe': [793], 'monkey_king_bar': [2170], 'bottle': [98], 'flask': [10], 'magic_stick': [332], 'javelin': [1901, 2157], 'ogre_axe': [1235, 2594], 'bracer': [786], 'sange': [1307], 'phase_boots': [677], 'demon_edge': [1734], 'sange_and_yasha': [1497], 'wraith_band': [-61], 'boots': [310], 'boots_of_elves': [1497], 'gauntlets': [786], 'sobi_mask': [148], 'blades_of_attack': [648, 648], 'yasha': [1497], 'circlet': [-61, 514, 786], 'slippers': [-61], 'ring_of_aquila': [236], 'blade_of_alacrity': [1497]}"
5010,3,"{'magic_stick': [204], 'broadsword': [1043], 'blink': [2105], 'tango': [-78], 'travel_boots': [801], 'bottle': [558], 'tpscroll': [329, 559, 687], 'robe': [1270], 'magic_wand': [315], 'smoke_of_deceit': [561, 807], 'urn_of_shadows': [315], 'ward_observer': [1243], 'branches': [-79, -79], 'blade_of_alacrity': [2001], 'circlet': [204], 'yasha': [2001], 'gauntlets': [-80, -80], 'boots': [277], 'blade_mail': [1280], 'boots_of_elves': [2001], 'sobi_mask': [140], 'chainmail': [1188], 'clarity': [-76]}"
49650,132,"{'black_king_bar': [1276], 'circlet': [-74], 'desolator': [1001], 'slippers': [-74], 'blades_of_attack': [223, 353, 2742], 'lesser_crit': [2742], 'ogre_axe': [1088], 'assault': [2459], 'hyperstone': [2192], 'wraith_band': [-74], 'boots': [189], 'phase_boots': [379], 'chainmail': [2357], 'tpscroll': [235, 355, 496, 682, 1020, 1296, 1317, 1803, 2143, 2493, 2863], 'blink': [1742], 'broadsword': [2742], 'platemail': [2409], 'bottle': [99], 'mithril_hammer': [751, 927, 1238]}"
7474,132,"{'dagon_3': [2697], 'point_booster': [1295], 'dagon': [2495, 2507], 'blades_of_attack': [1043, 1043], 'staff_of_wizardry': [1490, 1873, 2495, 2507], 'ward_observer': [1715], 'ultimate_scepter': [1709], 'tango': [-79], 'tpscroll': [314, 749, 945, 1882, 2124], 'mantle': [2495, 2507], 'dagon_4': [2809], 'boots': [888], 'void_stone': [2174], 'sobi_mask': [2048], 'phase_boots': [1043], 'dagon_2': [2497, 2507], 'ogre_axe': [1345], 'clarity': [-77, -76], 'null_talisman': [2495, 2507], 'gloves': [-82], 'branches': [-81, -81], 'blade_of_alacrity': [1708], 'cyclone': [2301], 'circlet': [2495, 2507], 'dagon_5': [2819], 'hand_of_midas': [729]}"


In [8]:
# add match and item information to player data

pl_final = (pl_players
    .filter(pl.col("account_id") != 0)
    .join(purchases_with_item_names, on=["match_id", "player_slot"])
    .join(match_with_region, how="left", on="match_id")
    )

# drop cols with more than 20% NaN
pl_final = pl_final[
            :,
            [
                pl_final[col].null_count() <= 0.2 * pl_final.height
                for col in pl_final.columns
                if col not in ["purchases"]
            ]
            + ["purchases"],
        ]

print(pl_final)

shape: (318787, 67)
┌───────┬──────────┬─────────┬───────────┬─────┬───────────┬────────────┬────────────┬─────────────┐
│ match ┆ account_ ┆ hero_id ┆ player_sl ┆ ... ┆ radiant_w ┆ negative_v ┆ positive_v ┆ region      │
│ _id   ┆ id       ┆ ---     ┆ ot        ┆     ┆ in        ┆ otes       ┆ otes       ┆ ---         │
│ ---   ┆ ---      ┆ i64     ┆ ---       ┆     ┆ ---       ┆ ---        ┆ ---        ┆ str         │
│ i64   ┆ i64      ┆         ┆ i64       ┆     ┆ bool      ┆ i64        ┆ i64        ┆             │
╞═══════╪══════════╪═════════╪═══════════╪═════╪═══════════╪════════════╪════════════╪═════════════╡
│ 34423 ┆ 3175     ┆ 9       ┆ 129       ┆ ... ┆ true      ┆ 0          ┆ 0          ┆ SINGAPORE   │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 49650 ┆ 10811    ┆ 46      ┆ 132       ┆ ... ┆ false     ┆ 0          ┆ 0          ┆ SINGAPORE   │
├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌

### Analyse the data

#### Win rates

In [9]:
pl_final["duration"].describe()

statistic,value
str,f64
"""min""",59.0
"""max""",16037.0
"""null_count""",0.0
"""mean""",2461.093294
"""std""",641.437472
"""count""",318787.0


In [None]:
pl_final