# Imports

In [18]:
# Built-In
from importlib import reload

# 3rd party
import pandas
from sklearn.model_selection import train_test_split

# In-House

## Intra
Separated because we need to reload often

In [20]:
import helpers
from helpers import get_damage_profile_composition

reload(helpers)

<module 'helpers' from 'c:\\Users\\Beau\\Desktop\\ML\\faker\\helpers.py'>

# Globals

In [21]:
RANDOM_STATE = 30

# Load Data

In [22]:
file_path: str = "./merged9.csv"
# Load csv
print(f'Loading "{file_path}"')
data: pandas.DataFrame = pandas.read_csv(file_path)
print(f"Finish loading.")
print(data.shape)
data.head()

Loading "./merged9.csv"
Finish loading.
(987, 65)


Unnamed: 0,Date,Tournament,W/L,Side,Vs_Team,Game_Length,Champion,Vs_Champion,Kills,Deaths,...,teammate_top_ign,teammate_jungle_ign,teammate_mid_ign,teammate_adc_ign,teammate_support_ign,opponent_top_ign,opponent_jungle_ign,opponent_mid_ign,opponent_adc_ign,opponent_support_ign
0,2013-04-06 10:30:00,"Korea,Champions 2013 Spring",Win,Red,CJ Entus Blaze,35:42,Nidalee,Kha'Zix,6,0,...,Impact,bengi,Faker,Piglet,PoohManDu,Flame,Helios (Shin Dong-jin),Ambition,Cpt Jack,Lustboy
1,2013-04-06 10:30:01,"Korea,Champions 2013 Spring",Win,Blue,CJ Entus Blaze,35:23,Karthus,Diana,12,2,...,Impact,bengi,Faker,Piglet,PoohManDu,Flame,Helios (Shin Dong-jin),Ambition,Cpt Jack,Lustboy
2,2013-04-12 09:30:00,"Korea,Champions 2013 Spring",Loss,Red,NaJin Sword,28:49,Lux,Twisted Fate,2,6,...,Impact,bengi,Faker,Piglet,PoohManDu,MakNooN,watch,SSONG,PraY,Cain
3,2013-04-12 09:30:01,"Korea,Champions 2013 Spring",Win,Blue,NaJin Sword,33:24,Twisted Fate,Zilean,7,3,...,Impact,bengi,Faker,Piglet,PoohManDu,MakNooN,watch,SSONG,PraY,Cain
4,2013-04-17 09:30:00,"Korea,Champions 2013 Spring",Win,Blue,MVP Blue,38:43,Jayce,Kha'Zix,6,3,...,Impact,bengi,Faker,Piglet,PoohManDu,Cheonju,ChuNyang,Easyhoon,Deft,FLahm


In [23]:
data.columns

Index(['Date', 'Tournament', 'W/L', 'Side', 'Vs_Team', 'Game_Length',
       'Champion', 'Vs_Champion', 'Kills', 'Deaths', 'Assists', 'KDA',
       'Spells', 'CS', 'Delta_CS', 'Items', 'Date_parsed',
       'tournaments_played', 'date', 'blue_side_team', 'red_side_team',
       'winner', 'blue_side_bans', 'red_side_bans', 'blue_side_picks',
       'red_side_picks', 'blue_side_roster', 'red_side_roster',
       'tournament_curr_total_games', 'tournament_curr_wins',
       'tournament_curr_losses', 'tournament_curr_win_percentage',
       'tournament_type', 'teams_region', 'teammate_role_top',
       'teammate_role_jungle', 'teammate_role_mid', 'teammate_role_adc',
       'teammate_role_support', 'enemy_role_top', 'enemy_role_jungle',
       'enemy_role_mid', 'enemy_role_adc', 'enemy_role_support', 'bans',
       'teammate_top_champion', 'teammate_jungle_champion',
       'teammate_mid_champion', 'teammate_adc_champion',
       'teammate_support_champion', 'opponent_top_champion',
      

# Feature Engineer

## add "Team's Damage Composition"

In [24]:
data[["blue_physical_damage_perc",
      "blue_magic_damage_perc",
      "blue_true_damage_perc",
      "red_physical_damage_perc",
      "red_magic_damage_perc",
      "red_true_damage_perc"]] = data.apply(get_damage_profile_composition, axis=1, result_type="expand")

# Preprocess

In [25]:
cols_to_use = [
    "Side",
    "tournament_curr_win_percentage",
    "teams_region",
    "teammate_role_top",
    "teammate_role_jungle",
    "teammate_role_mid",
    "teammate_role_adc",
    "teammate_role_support",
    "enemy_role_top",
    "enemy_role_jungle",
    "enemy_role_mid",
    "enemy_role_adc",
    "enemy_role_support",
    "tournament_type",
    "blue_physical_damage_perc",
    "blue_magic_damage_perc",
    "blue_true_damage_perc",
    "red_physical_damage_perc",
    "red_magic_damage_perc",
    "red_true_damage_perc",
]

X = data[cols_to_use]

## transform target column to discrete values

In [26]:
y = data["W/L"]
def transform_game_result(game_result):
    if game_result.lower() == "win":
        return 1
    elif game_result.lower() == "loss":
        return 0
    else:
        raise ValueError("\"W/L\" column has invalid values")
y = y.transform(transform_game_result)

## create training, dev, and test set

In [27]:
# Train Set is 60% of entire data set
# Dev Set is 20% of entire data set
# Test Set is 20% of entire data set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=RANDOM_STATE)

X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, train_size=0.75, test_size=0.25, random_state=RANDOM_STATE)

## get categorical cols

In [28]:
categorical_cols = [col for col in X_train.columns if X_train[col].dtype == "object"]
print(f"categorical_cols = {categorical_cols}\n")

categorical_cols = ['Side', 'teams_region', 'teammate_role_top', 'teammate_role_jungle', 'teammate_role_mid', 'teammate_role_adc', 'teammate_role_support', 'enemy_role_top', 'enemy_role_jungle', 'enemy_role_mid', 'enemy_role_adc', 'enemy_role_support', 'tournament_type']

