# Final Resource

final resource as a RMarkdown webpage

- The purpose of your topic is clearly captured in your resource
- Quantitative questions are clearly and concisely explained with thoughtful text and compelling visuals
- A nuanced understanding of the important features of the dataset and topic is demonstrated.
- High-level insights (important descriptive information, major trends, notable outliers, etc.) should be prominent in your resource.
- Methods and results of statistical approaches are clear
- Your analysis should be easily reproducible using information in your GitHub repository
- Remember to tailor your resource to a specific target audience. The amount of framing you need to do for a scientific versus general audience is quite different

https://www.kaggle.com/pvkc8888/dota-2-pro-circuit-1718

https://dota2.gamepedia.com/Game_Versions

In [1]:
# Import packages
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn import metrics

In [96]:
dpc = pd.read_json("data/DPC.json").transpose()

In [97]:
dpc.drop(["all_word_counts", "barracks_status_dire", "barracks_status_radiant", "chat", "cluster", "comeback", 
          "cosmetics", "dire_score", "series_type", "skill", "stomp", "teamfights", "throw", "tower_status_dire", 
          "tower_status_radiant", "loss", "leagueid", "duration", "engine", "first_blood_time", "radiant_xp_adv", 
          "replay_url", "radiant_gold_adv", "positive_votes", "radiant_score", "negative_votes", 
          "objectives", "dire_team_id", "radiant_team_id", "human_players", "league", "lobby_type", 
          "draft_timings", "my_word_counts", "region", "replay_salt", "series_id"], axis=1, inplace=True)
dpc.drop([1], inplace=True)

In [118]:
# (7.06, 7.07, 7.08, 7.09, 7.10, 7.11, 7.12, 7.13, 7.14, 7.15, 7.16, 7.16)
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# for data with picks and bans
# for hero data https://github.com/kronusme/dota2-api/blob/master/data/heroes.json

In [119]:
# function that takes in range of patch number
# and returns dataframe with 1 team's picks and bans,
# and if the team won the match
def create_pick_ban_1_team(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "ban_1", "ban_2", "ban_3", "ban_4", "ban_5", "ban_6",
                                 "win"])
    for i in dpc.index:
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                ban1 = dpc.at[i, 'picks_bans'][0]["hero_id"]
                ban6 = dpc.at[i, 'picks_bans'][1]["hero_id"]
                ban2 = dpc.at[i, 'picks_bans'][2]["hero_id"]
                ban7 = dpc.at[i, 'picks_bans'][3]["hero_id"]
                ban3 = dpc.at[i, 'picks_bans'][4]["hero_id"]
                ban8 = dpc.at[i, 'picks_bans'][5]["hero_id"]

                pick1 = dpc.at[i, 'picks_bans'][6]["hero_id"]
                pick6 = dpc.at[i, 'picks_bans'][7]["hero_id"]
                pick7 = dpc.at[i, 'picks_bans'][8]["hero_id"]
                pick2 = dpc.at[i, 'picks_bans'][9]["hero_id"]

                ban9 = dpc.at[i, 'picks_bans'][10]["hero_id"]
                ban4 = dpc.at[i, 'picks_bans'][11]["hero_id"]
                ban10 = dpc.at[i, 'picks_bans'][12]["hero_id"]
                ban5 = dpc.at[i, 'picks_bans'][13]["hero_id"]

                pick8 = dpc.at[i, 'picks_bans'][14]["hero_id"]
                pick3 = dpc.at[i, 'picks_bans'][15]["hero_id"]
                pick9 = dpc.at[i, 'picks_bans'][16]["hero_id"]
                pick4 = dpc.at[i, 'picks_bans'][17]["hero_id"]  

                ban11 = dpc.at[i, 'picks_bans'][18]["hero_id"]
                ban6 = dpc.at[i, 'picks_bans'][19]["hero_id"]

                pick5 = dpc.at[i, 'picks_bans'][20]["hero_id"]
                pick10 = dpc.at[i, 'picks_bans'][21]["hero_id"] 

                firstWin = True
                secondWin = False

                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']

                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "ban_1": ban1, 
                                         "ban_2": ban2, 
                                         "ban_3": ban3, 
                                         "ban_4": ban4, 
                                         "ban_5": ban5,
                                         "ban_6": ban6,
                                         "win": firstWin}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "ban_1": ban6, 
                                         "ban_2": ban7, 
                                         "ban_3": ban8, 
                                         "ban_4": ban9, 
                                         "ban_5": ban10,
                                         "ban_6": ban11,
                                         "win": secondWin}, ignore_index=True)
    return dpc_df

In [None]:
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# has data for pick, ban, and win
patch707_single = create_pick_ban_1_team(26, 28)
patch710_single = create_pick_ban_1_team(29, 32)
patch714_single = create_pick_ban_1_team(33, 36)
patchAll_single = create_pick_ban_1_team(26, 36)

In [172]:
# function that takes in range of patch number
# and returns dataframe with radiant picks, dire picks, radiant/dire team ids,
# and if radiant won the match
def create_pick_ban_both_teams(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["r_pick_1", "r_pick_2", "r_pick_3", "r_pick_4", "r_pick_5",
                                     "r_ban_1", "r_ban_2", "r_ban_3", "r_ban_4", "r_ban_5", "r_ban_6",
                                     "d_pick_1", "d_pick_2", "d_pick_3", "d_pick_4", "d_pick_5",
                                     "d_ban_1", "d_ban_2", "d_ban_3", "d_ban_4", "d_ban_5", "d_ban_6",
                                     "radiantWin", "radiant", "dire"])
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                ban1 = dpc.at[i, "picks_bans"][0]["hero_id"]
                ban6 = dpc.at[i, "picks_bans"][1]["hero_id"]
                ban2 = dpc.at[i, "picks_bans"][2]["hero_id"]
                ban7 = dpc.at[i, "picks_bans"][3]["hero_id"]
                ban3 = dpc.at[i, "picks_bans"][4]["hero_id"]
                ban8 = dpc.at[i, "picks_bans"][5]["hero_id"]

                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                ban9 = dpc.at[i, "picks_bans"][10]["hero_id"]
                ban4 = dpc.at[i, "picks_bans"][11]["hero_id"]
                ban10 = dpc.at[i, "picks_bans"][12]["hero_id"]
                ban5 = dpc.at[i, "picks_bans"][13]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                ban11 = dpc.at[i, "picks_bans"][18]["hero_id"]
                ban6 = dpc.at[i, "picks_bans"][19]["hero_id"]

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                radiantWin = dpc.at[i, "radiant_win"]
                
                try:
                    team1_id = dpc.at[i, "radiant_team"]["team_id"]
                except TypeError:
                    continue
                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]
                picks_bans = {}

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            # print(str(dpc.at[i, "players"][0]["name"]) + " has 1st pick and is radiant")
                            picks_bans = {"r_pick_1": pick1, "r_pick_2": pick2, "r_pick_3": pick3, 
                                            "r_pick_4": pick4, "r_pick_5": pick5,
                                            "r_ban_1": ban1, "r_ban_2": ban2, "r_ban_3": ban3, 
                                            "r_ban_4": ban4, "r_ban_5": ban5, "r_ban_6": ban6,
                                            "d_pick_1": pick6, "d_pick_2": pick7, "d_pick_3": pick8, 
                                            "d_pick_4": pick9, "d_pick_5": pick10,
                                            "d_ban_1": ban6, "d_ban_2": ban7, "d_ban_3": ban8, 
                                            "d_ban_4": ban9, "d_ban_5": ban10, "d_ban_6": ban11}

                if not player1_first_pick:
                        # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                        picks_bans = {"r_pick_1": pick6, "r_pick_2": pick7, "r_pick_3": pick8, 
                                        "r_pick_4": pick9, "r_pick_5": pick10,
                                        "r_ban_1": ban6, "r_ban_2": ban7, "r_ban_3": ban8, 
                                        "r_ban_4": ban9, "r_ban_5": ban10, "r_ban_6": ban11,
                                        "d_pick_1": pick1, "d_pick_2": pick2, "d_pick_3": pick3, 
                                        "d_pick_4": pick4, "d_pick_5": pick5,
                                        "d_ban_1": ban1, "d_ban_2": ban2, "d_ban_3": ban3, 
                                        "d_ban_4": ban4, "d_ban_5": ban5, "d_ban_6": ban6}        

                picks_bans.update({"radiantWin": radiantWin,
                                 "radiant": radiant,
                                 "dire": dire})

                row = pd.Series(picks_bans)

                dpc_df = dpc_df.append(row, ignore_index=True)
    return dpc_df

In [178]:
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# has radiant picks/bans, dire picks/bans, team_ids, radiant win(true/false)
patch707_combined = create_pick_ban_both_teams(26, 28)
patch710_combined = create_pick_ban_both_teams(29, 32)
patch714_combined = create_pick_ban_both_teams(33, 36)
patchAll_combined = create_pick_ban_both_teams(26, 36)

In [169]:
def create_picks_team(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "win", "team"])
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                firstWin = True
                secondWin = False
                try:
                    team1_id = dpc.at[i, "radiant_team"]["team_id"]
                except TypeError:
                    continue
                team2_id = dpc.at[i, "dire_team"]["team_id"]
                
                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']
                    team1_id = dpc.at[i, "dire_team"]["team_id"]
                    team2_id = dpc.at[i, "radiant_team"]["team_id"]

                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "win": firstWin,
                                         "team": team1_id}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "win": secondWin,
                                         "team": team2_id}, ignore_index=True)
    return dpc_df

In [175]:
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# these are data with picks, team_id, and win (true, false)
patch707_picks_team = create_picks_team(26, 28)
patch710_picks_team = create_picks_team(29, 32)
patch714_picks_team = create_picks_team(33, 36)
patchAll_picks_team = create_picks_team(26, 36)

In [187]:
def create_picks(patch_range_low, patch_range_high):
    dpc_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "win"])
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                firstWin = True
                secondWin = False

                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]

                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            firstWin = dpc.at[i, 'radiant_win']
                            secondWin = not dpc.at[i, 'radiant_win']

                if not player1_first_pick:
                    # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                    secondWin = dpc.at[i, 'radiant_win']
                    firstWin = not dpc.at[i, 'radiant_win']

                dpc_df = dpc_df.append({"pick_1": pick1, 
                                         "pick_2": pick2, 
                                         "pick_3": pick3, 
                                         "pick_4": pick4, 
                                         "pick_5": pick5,
                                         "win": firstWin}, ignore_index=True)
                dpc_df = dpc_df.append({"pick_1": pick6, 
                                         "pick_2": pick7, 
                                         "pick_3": pick8, 
                                         "pick_4": pick9, 
                                         "pick_5": pick10,
                                         "win": secondWin}, ignore_index=True)
    return dpc_df

In [188]:
# major patches
# patch 7.07 to 7.09 (patch 26 to 28)
# patch 7.10 to 7.13 (patch 29 to 32)
# patch 7.14 to 7.16 (patch 33 to 36)

# these are data with only picks and win (true,false)
patch707_picks = create_picks(26, 28)
patch710_picks = create_picks(29, 32)
patch714_picks = create_picks(33, 36)
patchAll_picks = create_picks(26, 36)

In [182]:
# 115 heroes in the game

def create_vector(patch_range_low, patch_range_high):
    for i in dpc.index: 
        patch = dpc.at[i, 'patch']
        game_mode = dpc.at[i, 'game_mode']
        if patch >= patch_range_low and patch <= patch_range_high and game_mode == 2: 
            length = len(dpc.at[i, 'picks_bans'])
            if  length == 22:
                
                ban1 = dpc.at[i, "picks_bans"][0]["hero_id"]
                ban6 = dpc.at[i, "picks_bans"][1]["hero_id"]
                ban2 = dpc.at[i, "picks_bans"][2]["hero_id"]
                ban7 = dpc.at[i, "picks_bans"][3]["hero_id"]
                ban3 = dpc.at[i, "picks_bans"][4]["hero_id"]
                ban8 = dpc.at[i, "picks_bans"][5]["hero_id"]

                pick1 = dpc.at[i, "picks_bans"][6]["hero_id"]
                pick6 = dpc.at[i, "picks_bans"][7]["hero_id"]
                pick7 = dpc.at[i, "picks_bans"][8]["hero_id"]
                pick2 = dpc.at[i, "picks_bans"][9]["hero_id"]

                ban9 = dpc.at[i, "picks_bans"][10]["hero_id"]
                ban4 = dpc.at[i, "picks_bans"][11]["hero_id"]
                ban10 = dpc.at[i, "picks_bans"][12]["hero_id"]
                ban5 = dpc.at[i, "picks_bans"][13]["hero_id"]

                pick8 = dpc.at[i, "picks_bans"][14]["hero_id"]
                pick3 = dpc.at[i, "picks_bans"][15]["hero_id"]
                pick9 = dpc.at[i, "picks_bans"][16]["hero_id"]
                pick4 = dpc.at[i, "picks_bans"][17]["hero_id"]  

                ban11 = dpc.at[i, "picks_bans"][18]["hero_id"]
                ban6 = dpc.at[i, "picks_bans"][19]["hero_id"]

                pick5 = dpc.at[i, "picks_bans"][20]["hero_id"]
                pick10 = dpc.at[i, "picks_bans"][21]["hero_id"] 

                radiantWin = dpc.at[i, "radiant_win"]
                
                team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
                player1_hero = dpc.at[i, "players"][0]["hero_id"]
                player1_first_pick = False
                player1_radiant = dpc.at[i, "players"][0]["isRadiant"]
                picks_bans = {}

                hero_vector = np.zeros((4, 115))
                
                for hero in team1:
                    if hero == player1_hero:
                        player1_first_pick = True
                        # this is ran when player1's team has first pick and is radiant
                        if player1_radiant:
                            # print(str(dpc.at[i, "players"][0]["name"]) + " has 1st pick and is radiant")
                            picks_bans = {"r_pick_1": pick1, "r_pick_2": pick2, "r_pick_3": pick3, 
                                            "r_pick_4": pick4, "r_pick_5": pick5,
                                            "r_ban_1": ban1, "r_ban_2": ban2, "r_ban_3": ban3, 
                                            "r_ban_4": ban4, "r_ban_5": ban5, "r_ban_6": ban6,
                                            "d_pick_1": pick6, "d_pick_2": pick7, "d_pick_3": pick8, 
                                            "d_pick_4": pick9, "d_pick_5": pick10,
                                            "d_ban_1": ban6, "d_ban_2": ban7, "d_ban_3": ban8, 
                                            "d_ban_4": ban9, "d_ban_5": ban10, "d_ban_6": ban11}    

                if not player1_first_pick:
                        # print(str(dpc.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
                        picks_bans = {"r_pick_1": pick6, "r_pick_2": pick7, "r_pick_3": pick8, 
                                        "r_pick_4": pick9, "r_pick_5": pick10,
                                        "r_ban_1": ban6, "r_ban_2": ban7, "r_ban_3": ban8, 
                                        "r_ban_4": ban9, "r_ban_5": ban10, "r_ban_6": ban11,
                                        "d_pick_1": pick1, "d_pick_2": pick2, "d_pick_3": pick3, 
                                        "d_pick_4": pick4, "d_pick_5": pick5,
                                        "d_ban_1": ban1, "d_ban_2": ban2, "d_ban_3": ban3, 
                                        "d_ban_4": ban4, "d_ban_5": ban5, "d_ban_6": ban6}        

                pick_b = True
                for v_row in hero_vector:
                    for pb in picks_bans:
                        if pick_b:
                            for i in np.arange(5):
                                v_row[pb] = 1;
                            pick_b = False
                        else:
                            for i in np.arange(6):
                                v_row[pb] = 1;
                            pick_b = True

                
                
#                 picks_bans.update({"radiantWin": radiantWin,
#                                  "radiant": radiant,
#                                  "dire": dire})

#                 row = pd.Series(picks_bans)

#                 dpc_df = dpc_df.append(row, ignore_index=True)
#     return dpc_df

(4, 115)

# Below is only for testing on small subset of data

In [127]:
patches = dpc["patch"].unique()
for patch in patches:
    print("in patch " + str(patch) + ": " + str(dpc.loc[dpc["patch"] == patch].shape[0]) + " matches")
    print("    first instance of patch: " + str(dpc.loc[dpc["patch"] == patch].head(1).index[0]))
    print("     last instance of patch: " + str(dpc.loc[dpc["patch"] == patch].tail(1).index[0]))
# print(dpc.columns)
# print(dpc.at[3541962187, "picks_bans"][0])
# print(dpc.at[3541962187, "picks_bans"][1])
# print(dpc.at[3541962187, "radiant_team"]["name"])
# print(dpc.at[3541962187, "dire_team"]["name"])
# print(dpc.at[3625490002, "radiant_team"]["team_id"])


in patch 25: 91 matches
    first instance of patch: 3497210298
     last instance of patch: 3530486844
in patch 26: 301 matches
    first instance of patch: 3537526907
     last instance of patch: 3704280890
in patch 27: 33 matches
    first instance of patch: 3710610589
     last instance of patch: 3716818051
in patch 28: 64 matches
    first instance of patch: 3743740299
     last instance of patch: 3752758474
in patch 29: 51 matches
    first instance of patch: 3763492629
     last instance of patch: 3775933971
in patch 30: 63 matches
    first instance of patch: 3781456589
     last instance of patch: 3800120344
in patch 31: 107 matches
    first instance of patch: 3805102807
     last instance of patch: 3820686389
in patch 32: 35 matches
    first instance of patch: 3828770018
     last instance of patch: 3834963072
in patch 33: 110 matches
    first instance of patch: 3853663055
     last instance of patch: 3876799364
in patch 34: 156 matches
    first instance of patch: 3878216

In [43]:
# testing on smaller subset of data

dpc_s = pd.read_json("data/DPC_small.json").transpose()
dpc_s.drop(["all_word_counts", "barracks_status_dire", "barracks_status_radiant", "chat", "cluster", 
          "cosmetics", "dire_score", "series_type", "skill", "teamfights", "throw", "tower_status_dire", 
          "tower_status_radiant", "loss", "leagueid", "duration", "engine", "first_blood_time", "radiant_xp_adv", 
          "replay_url", "radiant_gold_adv", "positive_votes", "radiant_score", "negative_votes", 
          "objectives", "dire_team_id", "radiant_team_id", "human_players", "league", "lobby_type", 
          "draft_timings", "my_word_counts", "region", "replay_salt", "series_id"], axis=1, inplace=True)

In [135]:
dpc_s_df = pd.DataFrame(columns=["r_pick_1", "r_pick_2", "r_pick_3", "r_pick_4", "r_pick_5",
                                 "r_ban_1", "r_ban_2", "r_ban_3", "r_ban_4", "r_ban_5", "r_ban_6",
                                 "d_pick_1", "d_pick_2", "d_pick_3", "d_pick_4", "d_pick_5",
                                 "d_ban_1", "d_ban_2", "d_ban_3", "d_ban_4", "d_ban_5", "d_ban_6",
                                 "radiantWin", "radiant", "dire"])

dpc_ss_df = pd.DataFrame(columns=["pick_1", "pick_2", "pick_3", "pick_4", "pick_5",
                                 "ban_1", "ban_2", "ban_3", "ban_4", "ban_5", "ban_6",
                                 "win", "isRadiant", "team"])

for i in dpc_s.index: 
    print(dpc_s.at[i, "match_id"])
    ban1 = dpc_s.at[i, "picks_bans"][0]["hero_id"]
    ban6 = dpc_s.at[i, "picks_bans"][1]["hero_id"]
    ban2 = dpc_s.at[i, "picks_bans"][2]["hero_id"]
    ban7 = dpc_s.at[i, "picks_bans"][3]["hero_id"]
    ban3 = dpc_s.at[i, "picks_bans"][4]["hero_id"]
    ban8 = dpc_s.at[i, "picks_bans"][5]["hero_id"]

    pick1 = dpc_s.at[i, "picks_bans"][6]["hero_id"]
    pick6 = dpc_s.at[i, "picks_bans"][7]["hero_id"]
    pick7 = dpc_s.at[i, "picks_bans"][8]["hero_id"]
    pick2 = dpc_s.at[i, "picks_bans"][9]["hero_id"]

    ban9 = dpc_s.at[i, "picks_bans"][10]["hero_id"]
    ban4 = dpc_s.at[i, "picks_bans"][11]["hero_id"]
    ban10 = dpc_s.at[i, "picks_bans"][12]["hero_id"]
    ban5 = dpc_s.at[i, "picks_bans"][13]["hero_id"]

    pick8 = dpc_s.at[i, "picks_bans"][14]["hero_id"]
    pick3 = dpc_s.at[i, "picks_bans"][15]["hero_id"]
    pick9 = dpc_s.at[i, "picks_bans"][16]["hero_id"]
    pick4 = dpc_s.at[i, "picks_bans"][17]["hero_id"]  

    ban11 = dpc_s.at[i, "picks_bans"][18]["hero_id"]
    ban6 = dpc_s.at[i, "picks_bans"][19]["hero_id"]

    pick5 = dpc_s.at[i, "picks_bans"][20]["hero_id"]
    pick10 = dpc_s.at[i, "picks_bans"][21]["hero_id"] 

    radiantWin = dpc_s.at[i, "radiant_win"]
    
    radiant = dpc_s.at[i, "radiant_team"]["team_id"]
    dire = dpc_s.at[i, "dire_team"]["team_id"]
    
    team1 = pd.Series({"pick_1": pick1, "pick_2": pick2, "pick_3": pick3, "pick_4": pick4, "pick_5": pick5})
    player1_hero = dpc_s.at[i, "players"][0]["hero_id"]
    player1_first_pick = False
    player1_radiant = dpc_s.at[i, "players"][0]["isRadiant"]
    picks_bans = {}
    
    firstWin = True
    secondWin = False
    firstRadiant = True
    secondRadiant = False
    
    for hero in team1:
        if hero == player1_hero:
            player1_first_pick = True
            # this is ran when player1's team has first pick and is radiant
            if player1_radiant:
                print(str(dpc_s.at[i, "players"][0]["name"]) + " has 1st pick and is radiant")
                picks_bans = {"r_pick_1": pick1, "r_pick_2": pick2, "r_pick_3": pick3, 
                                "r_pick_4": pick4, "r_pick_5": pick5,
                                "r_ban_1": ban1, "r_ban_2": ban2, "r_ban_3": ban3, 
                                "r_ban_4": ban4, "r_ban_5": ban5, "r_ban_6": ban6,
                                "d_pick_1": pick6, "d_pick_2": pick7, "d_pick_3": pick8, 
                                "d_pick_4": pick9, "d_pick_5": pick10,
                                "d_ban_1": ban6, "d_ban_2": ban7, "d_ban_3": ban8, 
                                "d_ban_4": ban9, "d_ban_5": ban10, "d_ban_6": ban11}
                firstWin = dpc_s.at[i, 'radiant_win']
                secondWin = not dpc_s.at[i, 'radiant_win']
                
    if not player1_first_pick:
        print(str(dpc_s.at[i, "players"][0]["name"]) + " has 2nd pick and is radiant")
        picks_bans = {"r_pick_1": pick6, "r_pick_2": pick7, "r_pick_3": pick8, 
                        "r_pick_4": pick9, "r_pick_5": pick10,
                        "r_ban_1": ban6, "r_ban_2": ban7, "r_ban_3": ban8, 
                        "r_ban_4": ban9, "r_ban_5": ban10, "r_ban_6": ban11,
                        "d_pick_1": pick1, "d_pick_2": pick2, "d_pick_3": pick3, 
                        "d_pick_4": pick4, "d_pick_5": pick5,
                        "d_ban_1": ban1, "d_ban_2": ban2, "d_ban_3": ban3, 
                        "d_ban_4": ban4, "d_ban_5": ban5, "d_ban_6": ban6}
        secondWin = dpc_s.at[i, 'radiant_win']
        firstWin = not dpc_s.at[i, 'radiant_win']
        firstRadiant = False
        secondRadiant = True
        radiant = dpc_s.at[i, "dire_team"]["team_id"]
        dire = dpc_s.at[i, "radiant_team"]["team_id"]

    picks_bans.update({"radiantWin": radiantWin,
                     "radiant": radiant,
                     "dire": dire})

    row = pd.Series(picks_bans)
    
    dpc_s_df = dpc_s_df.append(row, ignore_index=True)

    dpc_ss_df = dpc_ss_df.append({"pick_1": pick1, 
                             "pick_2": pick2, 
                             "pick_3": pick3, 
                             "pick_4": pick4, 
                             "pick_5": pick5,
                             "ban_1": ban1, 
                             "ban_2": ban2, 
                             "ban_3": ban3, 
                             "ban_4": ban4, 
                             "ban_5": ban5,
                             "ban_6": ban6,
                             "win": firstWin,
                             "isRadiant": firstRadiant,
                             "team": radiant}, ignore_index=True)
    dpc_ss_df = dpc_ss_df.append({"pick_1": pick6, 
                             "pick_2": pick7, 
                             "pick_3": pick8, 
                             "pick_4": pick9, 
                             "pick_5": pick10,
                             "ban_1": ban6, 
                             "ban_2": ban7, 
                             "ban_3": ban8, 
                             "ban_4": ban9, 
                             "ban_5": ban10,
                             "ban_6": ban11,
                             "win": secondWin,
                             "isRadiant": secondRadiant,
                             "team": dire}, ignore_index=True)
    
print(dpc_s_df.head(5))
print(dpc_ss_df.head(10))

3544999229
Miracle- has 2nd pick and is radiant
3545125310
Miracle- has 1st pick and is radiant
3545253155
Paparazi灬 has 1st pick and is radiant
3545330410
Paparazi灬 has 1st pick and is radiant
3545411013
Miracle- has 1st pick and is radiant
  r_pick_1 r_pick_2 r_pick_3 r_pick_4 r_pick_5 r_ban_1 r_ban_2 r_ban_3  \
0        3       60      103       11       70     109      38     112   
1        5       28       53       44       39     112      85      43   
2       86       51       42       43       65     107      80      47   
3       38       86       28       63       17      60     107      68   
4      107       78       83       74       54     112      43      85   

  r_ban_4 r_ban_5   ...   d_pick_5 d_ban_1 d_ban_2 d_ban_3 d_ban_4 d_ban_5  \
0      41      97   ...         29      80      47     107      54      53   
1      51      18   ...         81     109      86     107      80      54   
2      53      54   ...         93      61     112      38      97      81   
3

In [185]:
# Does bans matter
# Does Team ID matter
# Only Picks
# based off patch

print(np.arange(5))

[0 1 2 3 4]
