## Code from Final Resource

In [79]:
# Import packages
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn import metrics


# import data
dpc = pd.read_json("data/DPC.json").transpose()

# drops irrelevant columns
dpc.drop(["all_word_counts", "barracks_status_dire", "barracks_status_radiant", "chat", "cluster", "comeback", 
          "cosmetics", "dire_score", "series_type", "skill", "stomp", "teamfights", "throw", "tower_status_dire", 
          "tower_status_radiant", "loss", "leagueid", "duration", "engine", "first_blood_time", "radiant_xp_adv", 
          "replay_url", "radiant_gold_adv", "players", "positive_votes", "radiant_score", "negative_votes", 
          "objectives", "dire_team_id", "radiant_team_id", "human_players", "league", "lobby_type", 
          "draft_timings", "my_word_counts", "region", "replay_salt", "series_id"], axis=1, inplace=True)
dpc.drop([1], inplace=True)

# (7.06, 7.07, 7.08, 7.09, 7.10, 7.11, 7.12, 7.13, 7.14, 7.15, 7.16, 7.16)
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# for data with picks and bans
# for hero data https://github.com/kronusme/dota2-api/blob/master/data/heroes.json
dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "ban_1", "ban_2", "ban_3", "ban_4", "ban_5", "ban_6",
                                 "win"])

patch_range_low = 26
patch_range_high = 28

for i in dpc.index:
    patch = dpc.at[i, 'patch']
    game_mode = dpc.at[i, 'game_mode']
    if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
        length = len(dpc.at[i, 'picks_bans'])
        if  length == 22:
            ban1 = dpc.at[i, 'picks_bans'][0]["hero_id"]
            ban6 = dpc.at[i, 'picks_bans'][1]["hero_id"]
            ban2 = dpc.at[i, 'picks_bans'][2]["hero_id"]
            ban7 = dpc.at[i, 'picks_bans'][3]["hero_id"]
            ban3 = dpc.at[i, 'picks_bans'][4]["hero_id"]
            ban8 = dpc.at[i, 'picks_bans'][5]["hero_id"]

            pick1 = dpc.at[i, 'picks_bans'][6]["hero_id"]
            pick6 = dpc.at[i, 'picks_bans'][7]["hero_id"]
            pick7 = dpc.at[i, 'picks_bans'][8]["hero_id"]
            pick2 = dpc.at[i, 'picks_bans'][9]["hero_id"]

            ban9 = dpc.at[i, 'picks_bans'][10]["hero_id"]
            ban4 = dpc.at[i, 'picks_bans'][11]["hero_id"]
            ban10 = dpc.at[i, 'picks_bans'][12]["hero_id"]
            ban5 = dpc.at[i, 'picks_bans'][13]["hero_id"]

            pick8 = dpc.at[i, 'picks_bans'][14]["hero_id"]
            pick3 = dpc.at[i, 'picks_bans'][15]["hero_id"]
            pick9 = dpc.at[i, 'picks_bans'][16]["hero_id"]
            pick4 = dpc.at[i, 'picks_bans'][17]["hero_id"]  

            ban11 = dpc.at[i, 'picks_bans'][18]["hero_id"]
            ban6 = dpc.at[i, 'picks_bans'][19]["hero_id"]

            pick5 = dpc.at[i, 'picks_bans'][20]["hero_id"]
            pick10 = dpc.at[i, 'picks_bans'][21]["hero_id"] 

            firstWin = dpc.at[i, 'radiant_win']
            secondWin = not dpc.at[i, 'radiant_win']

            firstPick = dpc.at[i, 'picks_bans'][0]["team"]

            if firstPick == 1:
                firstWin = not firstWin
                secondWin = not secondWin

            dpc_df = dpc_df.append({"pick_1": pick1, 
                                     "pick_2": pick2, 
                                     "pick_3": pick3, 
                                     "pick_4": pick4, 
                                     "pick_5": pick5,
                                     "ban_1": ban1, 
                                     "ban_2": ban2, 
                                     "ban_3": ban3, 
                                     "ban_4": ban4, 
                                     "ban_5": ban5,
                                     "ban_6": ban6,
                                     "win": firstWin}, ignore_index=True)
            dpc_df = dpc_df.append({"pick_1": pick6, 
                                     "pick_2": pick7, 
                                     "pick_3": pick8, 
                                     "pick_4": pick9, 
                                     "pick_5": pick10,
                                     "ban_1": ban6, 
                                     "ban_2": ban7, 
                                     "ban_3": ban8, 
                                     "ban_4": ban9, 
                                     "ban_5": ban10,
                                     "ban_6": ban11,
                                     "win": secondWin}, ignore_index=True)

dpc_df.head(10)

# testing on smaller subset of data

#dpc_s = pd.read_json("data/DPC_small.json").transpose()
#dpc_s.drop(["all_word_counts", "barracks_status_dire", "barracks_status_radiant", "chat", "cluster", 
         # "cosmetics", "dire_score", "series_type", "skill", "teamfights", "throw", "tower_status_dire", 
         # "tower_status_radiant", "loss", "leagueid", "duration", "engine", "first_blood_time", "radiant_xp_adv", 
         # "replay_url", "radiant_gold_adv", "players", "positive_votes", "radiant_score", "negative_votes", 
         # "objectives", "dire_team_id", "radiant_team_id", "human_players", "league", "lobby_type", 
         # "draft_timings", "my_word_counts", "region", "replay_salt", "series_id"], axis=1, inplace=True)
#dpc_s.drop([1], inplace=True)

#dpc_s_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
#                                 "ban_1", "ban_2", "ban_3", "ban_4", "ban_5",
#                                 "win"])

#for i in dpc_s.index: 
#    # print(dpc_s.at[i, 'patch'])
#    ban1 = dpc_s.at[i, 'picks_bans'][0]["hero_id"]
#    ban6 = dpc_s.at[i, 'picks_bans'][1]["hero_id"]
#    ban2 = dpc_s.at[i, 'picks_bans'][2]["hero_id"]
#    ban7 = dpc_s.at[i, 'picks_bans'][3]["hero_id"]
    # ban3
    # ban8
#    pick1 = dpc_s.at[i, 'picks_bans'][4]["hero_id"]
#    pick6 = dpc_s.at[i, 'picks_bans'][5]["hero_id"]
#    pick7 = dpc_s.at[i, 'picks_bans'][6]["hero_id"]
#    pick2 = dpc_s.at[i, 'picks_bans'][7]["hero_id"]
    
#    ban8 = dpc_s.at[i, 'picks_bans'][8]["hero_id"]
#    ban3 = dpc_s.at[i, 'picks_bans'][9]["hero_id"]
#    ban9 = dpc_s.at[i, 'picks_bans'][10]["hero_id"]
#    ban4 = dpc_s.at[i, 'picks_bans'][11]["hero_id"]
    
#    pick8 = dpc_s.at[i, 'picks_bans'][12]["hero_id"]
#    pick3 = dpc_s.at[i, 'picks_bans'][13]["hero_id"]
#    pick9 = dpc_s.at[i, 'picks_bans'][14]["hero_id"]
#    pick4 = dpc_s.at[i, 'picks_bans'][15]["hero_id"]  
    
#    ban10 = dpc_s.at[i, 'picks_bans'][16]["hero_id"]
#    ban5 = dpc_s.at[i, 'picks_bans'][17]["hero_id"]
    
#    pick5 = dpc_s.at[i, 'picks_bans'][18]["hero_id"]
#    pick10 = dpc_s.at[i, 'picks_bans'][19]["hero_id"] 
    
#    firstWin = dpc_s.at[i, 'radiant_win']
#    secondWin = not dpc_s.at[i, 'radiant_win']
    
#    firstPick = dpc_s.at[i, 'picks_bans'][0]["team"]
    
#    if firstPick == 1:
#        firstWin = not firstWin
#        secondWin = not secondWin
    
#    dpc_s_df = dpc_s_df.append({"pick_1": pick1, 
#                                 "pick_2": pick2, 
#                                 "pick_3": pick3, 
#                                 "pick_4": pick4, 
#                                 "pick_5": pick5,
#                                 "ban_1": ban1, 
#                                 "ban_2": ban2, 
#                                 "ban_3": ban3, 
#                                 "ban_4": ban4, 
#                                 "ban_5": ban5,
#                                 "win": firstWin}, ignore_index=True)
#    dpc_s_df = dpc_s_df.append({"pick_1": pick6, 
#                                 "pick_2": pick7, 
#                                 "pick_3": pick8, 
#                                 "pick_4": pick9, 
#                                 "pick_5": pick10,
#                                 "ban_1": ban6, 
#                                 "ban_2": ban7, 
#                                 "ban_3": ban8, 
#                                 "ban_4": ban9, 
#                                 "ban_5": ban10,
#                                 "win": secondWin}, ignore_index=True)

Unnamed: 0,pick_1,pick_2,pick_3,pick_4,pick_5,ban_1,ban_2,ban_3,ban_4,ban_5,ban_6,win
0,9,50,62,55,42,88,107,3,1,43,12,True
1,53,16,87,39,8,12,38,91,109,18,106,False
2,16,85,86,15,74,107,3,38,43,106,12,False
3,53,71,70,112,46,12,60,91,50,1,17,True
4,91,85,64,41,9,107,3,88,43,53,47,False
5,112,71,13,54,39,47,60,38,17,106,76,True
6,3,86,47,8,78,60,112,71,4,45,95,True
7,9,88,87,96,12,95,40,36,61,1,53,False
8,40,31,62,109,20,53,55,90,78,1,67,False
9,47,86,107,13,93,67,112,3,71,4,43,True


# Madison's Stuff

4) (Madison) Training our model! 
Models: Decision Tree 
Models: [research models] - don’t combine rows, focus on individual games, run past team

Drop patch when running through models

In [80]:
# import decision tree and pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn import svm
import warnings; warnings.simplefilter('ignore')

# looking for categories and we have numbers-- switch ints to strings???
dpc_df['pick_1'] = dpc_df['pick_1'].astype(int)
dpc_df['pick_2'] = dpc_df['pick_2'].astype(int)
dpc_df['pick_3'] = dpc_df['pick_3'].astype(int)
dpc_df['pick_4'] = dpc_df['pick_4'].astype(int)
dpc_df['pick_5'] = dpc_df['pick_5'].astype(int)
dpc_df['ban_1'] = dpc_df['ban_1'].astype(int)
dpc_df['ban_2'] = dpc_df['ban_2'].astype(int)
dpc_df['ban_3'] = dpc_df['ban_3'].astype(int)
dpc_df['ban_4'] = dpc_df['ban_4'].astype(int)
dpc_df['ban_5'] = dpc_df['ban_5'].astype(int)
dpc_df['ban_6'] = dpc_df['ban_6'].astype(int)

In [81]:
# split data
train_features, test_features, train_outcome, test_outcome = train_test_split(
   dpc_df.drop("win", axis=1),     
   dpc_df.loc[: , "win"],    
   test_size=0.20
)

# solves an error about unknown labels
train_outcome = np.asarray(train_outcome, dtype="|S6")
test_outcome = np.asarray(test_outcome, dtype="|S6")

## Decision Tree

In [82]:
# create classifer
tree_clf = DecisionTreeClassifier()

# grid search with preprocessing
pipeline = make_pipeline(MinMaxScaler(), tree_clf)
param_grid = {'decisiontreeclassifier__max_depth': np.arange(3, 20), 'decisiontreeclassifier__criterion': ["gini", "entropy"], 'decisiontreeclassifier__splitter': ["best", "random"], 'decisiontreeclassifier__min_samples_split': np.arange(2, 20)} 

# pass pipeline to grid search
grid = GridSearchCV(pipeline, param_grid, cv=3)

grid.fit(train_features, train_outcome)
grid.score(test_features, test_outcome)


0.5202702702702703

## Linear SCV

https://scikit-learn.org/stable/modules/svm.html#classification

In [85]:
# create classifer
clf_svm = svm.SVC(gamma='scale')

# grid search with preprocessing
pipeline = make_pipeline(MinMaxScaler(), clf_svm)
parameter_grid = {} 

# pass pipeline to grid search
grid = GridSearchCV(pipeline, parameter_grid)
grid.fit(train_features, train_outcome)
grid.score(test_features, test_outcome)

0.5337837837837838

The Linear SCV model consistenly score