# Imports

In [31]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf
import pandas as pd
import keras
from keras.utils import FeatureSpace

# Constants

In [32]:
PLAYER_HERO_IDS = [f"radiant_player_{x}_hero_id" for x in range(1, 6)] + [f"dire_player_{x}_hero_id" for x in range(1, 6)]
PLAYER_HEROES = [f"radiant_player_{x}_hero" for x in range(1, 6)] + [f"dire_player_{x}_hero" for x in range(1, 6)]
PLAYER_POSITIONS = [f"radiant_player_{x}_position" for x in range(1, 6)] + [f"dire_player_{x}_position" for x in range(1, 6)]
PLAYER_LANES = [f"radiant_player_{x}_lane" for x in range(1, 6)] + [f"dire_player_{x}_lane" for x in range(1, 6)]
PLAYER_ROLES = [f"radiant_player_{x}_role" for x in range(1, 6)] + [f"dire_player_{x}_role" for x in range(1, 6)]

PLAYER_NAMES = [f"radiant_player_{x}_name" for x in range(1, 6)] + [f"dire_player_{x}_name" for x in range(1, 6)]
PLAYER_NETWORTHS = [f"radiant_player_{x}_networth" for x in range(1, 6)] + [f"dire_player_{x}_networth" for x in range(1, 6)]

PREMATCH_FEATURES = [
  "league",
  "league_id",
  "league_tier",
  "league_start_date_time",
  "league_end_date_time",
  "league_region",
  "series_id",
  "series_type",
  "match_id",
  "match_start_date_time",
  "radiant_team_id",
  "radiant_team_name",
  "dire_team_id",
  "dire_team_name",
  "game_version_id"
]

POST_DRAFT_FEATURES = PLAYER_HERO_IDS + PLAYER_HEROES + PLAYER_POSITIONS + PLAYER_LANES + PLAYER_ROLES

STARTING_ELO, K = 1500, 20

# Fetch Data

In [33]:
df = pd.read_parquet("dota2_matches.parquet")
df.head()

Unnamed: 0,league,league_id,league_tier,league_start_date_time,league_end_date_time,league_region,series_id,series_type,match_id,match_start_date_time,...,dire_player_5_hero_id,dire_player_5_hero,dire_player_5_position,dire_player_5_lane,dire_player_5_role,dire_player_5_kills,dire_player_5_deaths,dire_player_5_assists,dire_player_5_networth,game_version_id
0,Dota 2 Space League,17163,PROFESSIONAL,2024-10-12 21:00:00,2025-03-13 22:00:00,,915196,BEST_OF_THREE,7988517842,2024-10-15 01:28:24,...,108,Underlord,POSITION_3,OFF_LANE,CORE,8,8,12,19449,177
1,Royal Circuit #3,17064,PROFESSIONAL,2024-10-01 05:00:00,2024-10-31 05:00:00,,915178,BEST_OF_THREE,7988515816,2024-10-15 01:24:03,...,20,Vengeful Spirit,POSITION_5,SAFE_LANE,HARD_SUPPORT,1,7,13,7981,177
2,Dota 2 Space League,17163,PROFESSIONAL,2024-10-12 21:00:00,2025-03-13 22:00:00,,915196,BEST_OF_THREE,7988495156,2024-10-15 00:43:58,...,14,Pudge,POSITION_5,SAFE_LANE,HARD_SUPPORT,2,4,10,10220,177
3,Royal Circuit #3,17064,PROFESSIONAL,2024-10-01 05:00:00,2024-10-31 05:00:00,,915178,BEST_OF_THREE,7988474098,2024-10-15 00:01:33,...,83,Treant Protector,POSITION_5,SAFE_LANE,HARD_SUPPORT,3,2,29,9250,177
4,Dota 2 Space League,17163,PROFESSIONAL,2024-10-12 21:00:00,2025-03-13 22:00:00,,915166,BEST_OF_THREE,7988466451,2024-10-14 23:44:45,...,138,Muerta,POSITION_4,OFF_LANE,LIGHT_SUPPORT,6,5,13,15004,177


# Clean Data

In [34]:
for role in PLAYER_ROLES + PLAYER_POSITIONS + PLAYER_LANES + PLAYER_NAMES:
  df[role] = df[role].str.strip().replace("", pd.NA).replace("UNKNOWN", pd.NA)
  df = df.dropna(subset=[role])

df.league_region = df.league_region.str.strip().replace("UNSET", "UNKNOWN").fillna("UNKNOWN")
df.radiant_team_name = df.radiant_team_name.str.strip().replace("", pd.NA)

df = df.dropna(subset=["radiant_team_id", "radiant_team_name", "dire_team_id", "dire_team_name"])
df = df.dropna(subset=PLAYER_NETWORTHS)

df = df.sort_values(by="match_start_date_time", ascending=True).reset_index(drop=True)
df

Unnamed: 0,league,league_id,league_tier,league_start_date_time,league_end_date_time,league_region,series_id,series_type,match_id,match_start_date_time,...,dire_player_5_hero_id,dire_player_5_hero,dire_player_5_position,dire_player_5_lane,dire_player_5_role,dire_player_5_kills,dire_player_5_deaths,dire_player_5_assists,dire_player_5_networth,game_version_id
0,EPICENTER XL,9601,MAJOR,2018-01-10 00:00:00,2018-05-31 00:00:00,CIS,101532295,BEST_OF_THREE,3856785633,2018-04-29 00:00:00,...,38,Beastmaster,POSITION_3,OFF_LANE,CORE,6,0,8,12198,91
1,NON STOP Energy League,9971,PROFESSIONAL,2018-05-12 00:00:00,2018-06-24 00:00:00,EUROPE,101532596,BEST_OF_ONE,3887809621,2018-05-13 00:00:00,...,84,Ogre Magi,POSITION_5,SAFE_LANE,HARD_SUPPORT,1,8,3,3778,92
2,ESL India Premiership - 2017 Edition,5407,PROFESSIONAL,2017-05-10 00:00:00,2017-12-22 00:00:00,SEA,101532648,BEST_OF_ONE,3901011325,2018-05-19 00:00:00,...,59,Huskar,POSITION_1,SAFE_LANE,CORE,7,13,9,10303,92
3,The International 2018,9870,INTERNATIONAL,2018-07-31 00:00:00,2018-08-24 00:00:00,UNKNOWN,234861,BEST_OF_THREE,4063477071,2018-08-16 00:00:00,...,9,Mirana,POSITION_3,OFF_LANE,CORE,11,6,21,22478,96
4,The International 2018,9870,INTERNATIONAL,2018-07-31 00:00:00,2018-08-24 00:00:00,UNKNOWN,236158,BEST_OF_THREE,4065211201,2018-08-18 00:00:00,...,40,Venomancer,POSITION_1,SAFE_LANE,CORE,5,7,24,16213,96
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107261,Dota 2 Space League,17163,PROFESSIONAL,2024-10-12 21:00:00,2025-03-13 22:00:00,UNKNOWN,915166,BEST_OF_THREE,7988466451,2024-10-14 23:44:45,...,138,Muerta,POSITION_4,OFF_LANE,LIGHT_SUPPORT,6,5,13,15004,177
107262,Royal Circuit #3,17064,PROFESSIONAL,2024-10-01 05:00:00,2024-10-31 05:00:00,UNKNOWN,915178,BEST_OF_THREE,7988474098,2024-10-15 00:01:33,...,83,Treant Protector,POSITION_5,SAFE_LANE,HARD_SUPPORT,3,2,29,9250,177
107263,Dota 2 Space League,17163,PROFESSIONAL,2024-10-12 21:00:00,2025-03-13 22:00:00,UNKNOWN,915196,BEST_OF_THREE,7988495156,2024-10-15 00:43:58,...,14,Pudge,POSITION_5,SAFE_LANE,HARD_SUPPORT,2,4,10,10220,177
107264,Royal Circuit #3,17064,PROFESSIONAL,2024-10-01 05:00:00,2024-10-31 05:00:00,UNKNOWN,915178,BEST_OF_THREE,7988515816,2024-10-15 01:24:03,...,20,Vengeful Spirit,POSITION_5,SAFE_LANE,HARD_SUPPORT,1,7,13,7981,177


# Feature Engineering

In [35]:
ratings = {}

print("Processing matches...")

Processing matches...
