## Code from Final Resource File

In [70]:
# Import packages
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import math
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
#import data_functions


# import data
dpc = pd.read_json("data/DPC.json").transpose()

# drops irrelevant columns
dpc.drop(["all_word_counts", "barracks_status_dire", "barracks_status_radiant", "chat", "cluster", "comeback", 
          "cosmetics", "dire_score", "series_type", "skill", "stomp", "teamfights", "throw", "tower_status_dire", 
          "tower_status_radiant", "loss", "leagueid", "duration", "engine", "first_blood_time", "radiant_xp_adv", 
          "replay_url", "radiant_gold_adv", "positive_votes", "radiant_score", "negative_votes", 
          "objectives", "dire_team_id", "radiant_team_id", "human_players", "league", "lobby_type", 
          "draft_timings", "my_word_counts", "region", "replay_salt", "series_id"], axis=1, inplace=True)
dpc.drop([1], inplace=True)

# (7.06, 7.07, 7.08, 7.09, 7.10, 7.11, 7.12, 7.13, 7.14, 7.15, 7.16, 7.16)
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# function that takes in range of patch number
# and returns dataframe with 1 team's picks and bans,
# and if the team won the match
def create_pick_ban_1_team(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "ban_1", "ban_2", "ban_3", "ban_4", "ban_5", "ban_6",
                                 "win"])
    for i in dpc.index:
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                ban1 = dpc.at[i, 'picks_bans'][0]["hero_id"]
                ban6 = dpc.at[i, 'picks_bans'][1]["hero_id"]
                ban2 = dpc.at[i, 'picks_bans'][2]["hero_id"]
                ban7 = dpc.at[i, 'picks_bans'][3]["hero_id"]
                ban3 = dpc.at[i, 'picks_bans'][4]["hero_id"]
                ban8 = dpc.at[i, 'picks_bans'][5]["hero_id"]

                pick1 = dpc.at[i, 'picks_bans'][6]["hero_id"]
                pick6 = dpc.at[i, 'picks_bans'][7]["hero_id"]
                pick7 = dpc.at[i, 'picks_bans'][8]["hero_id"]
                pick2 = dpc.at[i, 'picks_bans'][9]["hero_id"]

                ban9 = dpc.at[i, 'picks_bans'][10]["hero_id"]
                ban4 = dpc.at[i, 'picks_bans'][11]["hero_id"]
                ban10 = dpc.at[i, 'picks_bans'][12]["hero_id"]
                ban5 = dpc.at[i, 'picks_bans'][13]["hero_id"]

                pick8 = dpc.at[i, 'picks_bans'][14]["hero_id"]
                pick3 = dpc.at[i, 'picks_bans'][15]["hero_id"]
                pick9 = dpc.at[i, 'picks_bans'][16]["hero_id"]
                pick4 = dpc.at[i, 'picks_bans'][17]["hero_id"]  

                ban11 = dpc.at[i, 'picks_bans'][18]["hero_id"]
                ban6 = dpc.at[i, 'picks_bans'][19]["hero_id"]

                pick5 = dpc.at[i, 'picks_bans'][20]["hero_id"]
                pick10 = dpc.at[i, 'picks_bans'][21]["hero_id"] 

                firstWin = True
                secondWin = False

                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']

                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "ban_1": ban1, 
                                         "ban_2": ban2, 
                                         "ban_3": ban3, 
                                         "ban_4": ban4, 
                                         "ban_5": ban5,
                                         "ban_6": ban6,
                                         "win": firstWin}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "ban_1": ban6, 
                                         "ban_2": ban7, 
                                         "ban_3": ban8, 
                                         "ban_4": ban9, 
                                         "ban_5": ban10,
                                         "ban_6": ban11,
                                         "win": secondWin}, ignore_index=True)
    return dpc_df

# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# has data for pick, ban, and win
patch707_single = create_pick_ban_1_team(26, 28)
patch710_single = create_pick_ban_1_team(29, 32)
patch714_single = create_pick_ban_1_team(33, 36)
patchAll_single = create_pick_ban_1_team(26, 36)

# function that takes in range of patch number
# and returns dataframe with 1 team's picks and bans,
# if the team won the match, and team_id
def create_pick_ban_team(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "ban_1", "ban_2", "ban_3", "ban_4", "ban_5", "ban_6",
                                 "win", "team"])
    for i in dpc.index:
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                ban1 = dpc.at[i, 'picks_bans'][0]["hero_id"]
                ban6 = dpc.at[i, 'picks_bans'][1]["hero_id"]
                ban2 = dpc.at[i, 'picks_bans'][2]["hero_id"]
                ban7 = dpc.at[i, 'picks_bans'][3]["hero_id"]
                ban3 = dpc.at[i, 'picks_bans'][4]["hero_id"]
                ban8 = dpc.at[i, 'picks_bans'][5]["hero_id"]

                pick1 = dpc.at[i, 'picks_bans'][6]["hero_id"]
                pick6 = dpc.at[i, 'picks_bans'][7]["hero_id"]
                pick7 = dpc.at[i, 'picks_bans'][8]["hero_id"]
                pick2 = dpc.at[i, 'picks_bans'][9]["hero_id"]

                ban9 = dpc.at[i, 'picks_bans'][10]["hero_id"]
                ban4 = dpc.at[i, 'picks_bans'][11]["hero_id"]
                ban10 = dpc.at[i, 'picks_bans'][12]["hero_id"]
                ban5 = dpc.at[i, 'picks_bans'][13]["hero_id"]

                pick8 = dpc.at[i, 'picks_bans'][14]["hero_id"]
                pick3 = dpc.at[i, 'picks_bans'][15]["hero_id"]
                pick9 = dpc.at[i, 'picks_bans'][16]["hero_id"]
                pick4 = dpc.at[i, 'picks_bans'][17]["hero_id"]  

                ban11 = dpc.at[i, 'picks_bans'][18]["hero_id"]
                ban6 = dpc.at[i, 'picks_bans'][19]["hero_id"]

                pick5 = dpc.at[i, 'picks_bans'][20]["hero_id"]
                pick10 = dpc.at[i, 'picks_bans'][21]["hero_id"] 

                firstWin = True
                secondWin = False

                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                try:
                    team1_id = dpc.at[i, "radiant_team"]["team_id"]
                except TypeError:
                    continue
                team2_id = dpc.at[i, "dire_team"]["team_id"]
                
                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']
                    team1_id = dpc.at[i, "dire_team"]["team_id"]
                    team2_id = dpc.at[i, "radiant_team"]["team_id"]
                    
                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "ban_1": ban1, 
                                         "ban_2": ban2, 
                                         "ban_3": ban3, 
                                         "ban_4": ban4, 
                                         "ban_5": ban5,
                                         "ban_6": ban6,
                                         "win": firstWin,
                                         "team": team1_id}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "ban_1": ban6, 
                                         "ban_2": ban7, 
                                         "ban_3": ban8, 
                                         "ban_4": ban9, 
                                         "ban_5": ban10,
                                         "ban_6": ban11,
                                         "win": secondWin,
                                         "team": team2_id}, ignore_index=True)
    return dpc_df


# (7.06, 7.07, 7.08, 7.09, 7.10, 7.11, 7.12, 7.13, 7.14, 7.15, 7.16, 7.16)
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# for data with picks and bans
# for hero data https://github.com/kronusme/dota2-api/blob/master/data/heroes.json

# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# has data for pick, ban, win, team_id
patch707_pbt = create_pick_ban_team(26, 28)
patch710_pbt = create_pick_ban_team(29, 32)
patch714_pbt = create_pick_ban_team(33, 36)
patchAll_pbt = create_pick_ban_team(26, 36)

# function that takes in range of patch number
# and returns dataframe with radiant picks, dire picks, radiant/dire team ids,
# and if radiant won the match
def create_pick_ban_both_teams(d, patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["r_pick_1", "r_pick_2", "r_pick_3", "r_pick_4", "r_pick_5",
                                     "r_ban_1", "r_ban_2", "r_ban_3", "r_ban_4", "r_ban_5", "r_ban_6",
                                     "d_pick_1", "d_pick_2", "d_pick_3", "d_pick_4", "d_pick_5",
                                     "d_ban_1", "d_ban_2", "d_ban_3", "d_ban_4", "d_ban_5", "d_ban_6",
                                     "radiantWin", "radiant", "dire"])
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                ban1 = dpc.at[i, "picks_bans"][0]["hero_id"]
                ban6 = dpc.at[i, "picks_bans"][1]["hero_id"]
                ban2 = dpc.at[i, "picks_bans"][2]["hero_id"]
                ban7 = dpc.at[i, "picks_bans"][3]["hero_id"]
                ban3 = dpc.at[i, "picks_bans"][4]["hero_id"]
                ban8 = dpc.at[i, "picks_bans"][5]["hero_id"]

                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                ban9 = dpc.at[i, "picks_bans"][10]["hero_id"]
                ban4 = dpc.at[i, "picks_bans"][11]["hero_id"]
                ban10 = dpc.at[i, "picks_bans"][12]["hero_id"]
                ban5 = dpc.at[i, "picks_bans"][13]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                ban11 = dpc.at[i, "picks_bans"][18]["hero_id"]
                ban6 = dpc.at[i, "picks_bans"][19]["hero_id"]

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                radiantWin = dpc.at[i, "radiant_win"]
                
                try:
                    team1_id = dpc.at[i, "radiant_team"]["team_id"]
                    team2_id = dpc.at[i, "dire_team"]["team_id"]
                except TypeError:
                    continue
                    
                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]
                picks_bans = {}

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            # print(str(dpc.at[i, "players"][0]["name"]) + " has 1st pick and is radiant")
                            picks_bans = {"r_pick_1": pick1, "r_pick_2": pick2, "r_pick_3": pick3, 
                                            "r_pick_4": pick4, "r_pick_5": pick5,
                                            "r_ban_1": ban1, "r_ban_2": ban2, "r_ban_3": ban3, 
                                            "r_ban_4": ban4, "r_ban_5": ban5, "r_ban_6": ban6,
                                            "d_pick_1": pick6, "d_pick_2": pick7, "d_pick_3": pick8, 
                                            "d_pick_4": pick9, "d_pick_5": pick10,
                                            "d_ban_1": ban6, "d_ban_2": ban7, "d_ban_3": ban8, 
                                            "d_ban_4": ban9, "d_ban_5": ban10, "d_ban_6": ban11}

                if not player1_first_pick:
                        # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                        picks_bans = {"r_pick_1": pick6, "r_pick_2": pick7, "r_pick_3": pick8, 
                                        "r_pick_4": pick9, "r_pick_5": pick10,
                                        "r_ban_1": ban6, "r_ban_2": ban7, "r_ban_3": ban8, 
                                        "r_ban_4": ban9, "r_ban_5": ban10, "r_ban_6": ban11,
                                        "d_pick_1": pick1, "d_pick_2": pick2, "d_pick_3": pick3, 
                                        "d_pick_4": pick4, "d_pick_5": pick5,
                                        "d_ban_1": ban1, "d_ban_2": ban2, "d_ban_3": ban3, 
                                        "d_ban_4": ban4, "d_ban_5": ban5, "d_ban_6": ban6}        

                picks_bans.update({"radiantWin": radiantWin,
                                 "radiant": team1_id,
                                 "dire": team2_id})

                row = pd.Series(picks_bans)

                dpc_df = dpc_df.append(row, ignore_index=True)
    return dpc_df


# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# has radiant picks/bans, dire picks/bans, team_ids, radiant win(true/false)
patch707_combined = create_pick_ban_both_teams(dpc, 26, 28)
patch710_combined = create_pick_ban_both_teams(dpc, 29, 32)
patch714_combined = create_pick_ban_both_teams(dpc, 33, 36)
patchAll_combined = create_pick_ban_both_teams(dpc, 26, 36)

def create_picks_team(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "win", "team"])
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                firstWin = True
                secondWin = False
                try:
                    team1_id = dpc.at[i, "radiant_team"]["team_id"]
                except TypeError:
                    continue
                team2_id = dpc.at[i, "dire_team"]["team_id"]
                
                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']
                    team1_id = dpc.at[i, "dire_team"]["team_id"]
                    team2_id = dpc.at[i, "radiant_team"]["team_id"]

                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "win": firstWin,
                                         "team": team1_id}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "win": secondWin,
                                         "team": team2_id}, ignore_index=True)
    return dpc_df


# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# these are data with picks, team_id, and win (true, false)
patch707_picks_team = create_picks_team(26, 28)
patch710_picks_team = create_picks_team(29, 32)
patch714_picks_team = create_picks_team(33, 36)
patchAll_picks_team = create_picks_team(26, 36)

def create_picks(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "win"])
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                firstWin = True
                secondWin = False

                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']

                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "win": firstWin}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "win": secondWin}, ignore_index=True)
    return dpc_df


# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# these are data with only picks and win (true,false)
patch707_picks = create_picks(26, 28)
patch710_picks = create_picks(29, 32)
patch714_picks = create_picks(33, 36)
patchAll_picks = create_picks(26, 36)

# Madison's Stuff

In [81]:
# import decision tree and pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn import svm
import warnings; warnings.simplefilter('ignore')

## Decision Tree 

In [82]:
def decision_tree_function(data):
    
    # looking for categories and we have numbers-- switch ints to strings???
    for i in list(data):
        if i != 'win' and i != 'team' and i != 'radiant' and i != 'dire':
            data[i] = data[i].astype(int)

    # split data
    train_features, test_features, train_outcome, test_outcome = train_test_split(
       data.drop("radiantWin", axis=1),     
       data.loc[: , "radiantWin"],    
       test_size=0.20
    )

    # solves an error about unknown labels
    train_outcome = np.asarray(train_outcome, dtype="|S6")
    test_outcome = np.asarray(test_outcome, dtype="|S6")
    
    # create classifer
    tree_clf = DecisionTreeClassifier()

    # grid search with preprocessing
    pipeline = make_pipeline(MinMaxScaler(), tree_clf)
    param_grid = {'decisiontreeclassifier__max_depth': np.arange(3, 20), 'decisiontreeclassifier__criterion': ["gini", "entropy"], 'decisiontreeclassifier__splitter': ["best", "random"], 'decisiontreeclassifier__min_samples_split': np.arange(2, 20)} 

    # pass pipeline to grid search
    grid = GridSearchCV(pipeline, param_grid, cv=3)

    grid.fit(train_features, train_outcome)
    return grid.score(test_features, test_outcome)
    

## Linear SCV

https://scikit-learn.org/stable/modules/svm.html#classification

In [83]:
def linear_scv_function(data):    
    
    # looking for categories and we have numbers-- switch ints to strings???
    for i in list(data):
        if i != 'win' and i != 'team' and i != 'radiant' and i != 'dire':
            data[i] = data[i].astype(int)

    # split data
    train_features, test_features, train_outcome, test_outcome = train_test_split(
       data.drop("radiantWin", axis=1),     
       data.loc[: , "radiantWin"],    
       test_size=0.20
    )

    # solves an error about unknown labels
    train_outcome = np.asarray(train_outcome, dtype="|S6")
    test_outcome = np.asarray(test_outcome, dtype="|S6")
    
    # create classifer
    clf_svm = svm.SVC(gamma='scale')

    # grid search with preprocessing
    pipeline = make_pipeline(MinMaxScaler(), clf_svm)
    parameter_grid = {} 

    # pass pipeline to grid search
    grid = GridSearchCV(pipeline, parameter_grid)
    grid.fit(train_features, train_outcome)
    return grid.score(test_features, test_outcome)


## Train Models and Get Results

In [84]:
# run all the models on all the data, get array in return

sub_datasets = [patch707_combined, patch710_combined, patch714_combined, patchAll_combined]
decision_tree_results = []
linear_svc_results = []
for i in sub_datasets: 
    decision_tree_results.append(decision_tree_function(i))
    linear_svc_results.append(linear_scv_function(i))
    

In [80]:
print("decision tree results: ")
print("patch707_combined: " + str(decision_tree_results[0]))
print("patch710_combined: " + str(decision_tree_results[1]))
print("patch714_combined: " + str(decision_tree_results[2]))
print("patchAll_combined: " + str(decision_tree_results[3]))
print(" ")
print("linear svc results: ")
print("patch707_combined: " + str(linear_svc_results[0]))
print("patch710_combined: " + str(linear_svc_results[1]))
print("patch714_combined: " + str(linear_svc_results[2]))
print("patchAll_combined: " + str(linear_svc_results[3]))

decision tree results: 
patch707_combined: 0.4594594594594595
patch710_combined: 0.47058823529411764
patch714_combined: 0.6133333333333333
patchAll_combined: 0.515
 
linear svc results: 
patch707_combined: 0.40540540540540543
patch710_combined: 0.5294117647058824
patch714_combined: 0.52
patchAll_combined: 0.645


Both models, the decision tree and linear svc model both score similarly in terms of predicting the resulting of the games. 