In [1]:
#import necessary libraries
import pandas as pd
import requests as req
from pandas import json_normalize
import numpy as np
import openpyxl
import plotly.graph_objects as go

In [2]:
#define function for calling match data using opendota API
#function accepts any number of match ids as input
def getMatchDetails(*matchids):
    #creating an empty dictionary to store the dataframes created for each api call
    match_dataframes = {}

    #for loop cycling through each match id, concatenating the id with url for the api call
    for matchid in matchids:
        url = f"https://api.opendota.com/api/matches/{matchid}"
        response = req.get(url)
    
        #if response code is good, store data as json, turn json data into dataframe, name the dataframe using the match id and store in the dictionary
        if response.status_code == 200:
            json_data = response.json()
            df = pd.DataFrame([json_data])
            match_dataframes[str(matchid)] = df
            print(f"DataFrame for Match ID {matchid} created successfully.")
        else:
            print(f"Failed to retrieve data for Match ID {matchid}. Status Code: {response.status_code}")

    #funtion returns the dictionary of dataframes
    return match_dataframes


In [3]:
#request hero data and make a dataframe from results called hero_data
url = "https://api.opendota.com/api/heroes"
get_heroes = req.get(url)
print(get_heroes.status_code)
get_heroes = get_heroes.json()
hero_data = pd.DataFrame(get_heroes)

200


In [4]:
#using the fuction created above, creating a dictionary of dataframes where each dataframe is the match data for each of the 151 games of the international 2023 tourney
international2023matches = getMatchDetails(7406531302,7406482053,7406424070,7406249246,7406129687,7404938247,7404889653,7404828649,7404763579,7404713057,7404668056,7404577536,7404488494,7404381764,7404249421,
                                           7403117150,7403057517,7402993316,7402943509,7402900929,7402800418,7402717428,7402626373,7402531427,7395420827,7395366323,7395312177,7395260041,7395201088,7395136494,
                                           7395048377,7394936686,7394832000,7393504676,7393464447,7393394521,7393331922,7393258586,7393133836,7393035065,7392908789,7391726311,7391681832,7391636050,7391585748,
                                           7391525205,7391463411,7391367511,7391247517,7391149823,7384188114,7384142800,7384108524,7384067209,7384021535,7383951137,7383855952,7383785611,7383689600,7382383379,
                                           7382316646,7382271393,7382234341,7382176499,7382126763,7382010427,7381903769,7381789226,7380503658,7380456695,7380454553,7380419098,7380411342,7380407733,7380401748,
                                           7380403482,7380377685,7380359415,7380353553,7380345561,7380339950,7380311197,7380292188,7380278774,7380279835,7380271573,7380204297,7380199457,7380191534,7380195136,
                                           7380184614,7380112773,7380101603,7380091953,7380089990,7379995208,7379995137,7379995224,7379995166,7379995104,7379050474,7379021143,7379016573,7379009384,7379001957,
                                           7378995240,7378986342,7378981947,7378973550,7378970387,7378962241,7378947046,7378947083,7378939289,7378932814,7378930433,7378917656,7378912368,7378905651,7378903471,
                                           7378901250,7378884672,7378880402,7378874061,7378876465,7378853458,7378851814,7378829327,7378828888,7378824942,7378805797,7378785114,7378776322,7378772656,7378760136,
                                           7378751246,7378707167,7378710682,7378693411,7378693331,7378676993,7378630668,7378617754,7378605383,7378609139,7378603271,7378530488,7378530439,7378530406,7378530507,
                                           7378530387
)

DataFrame for Match ID 7406531302 created successfully.
DataFrame for Match ID 7406482053 created successfully.
DataFrame for Match ID 7406424070 created successfully.
DataFrame for Match ID 7406249246 created successfully.
DataFrame for Match ID 7406129687 created successfully.
DataFrame for Match ID 7404938247 created successfully.
DataFrame for Match ID 7404889653 created successfully.
DataFrame for Match ID 7404828649 created successfully.
DataFrame for Match ID 7404763579 created successfully.
DataFrame for Match ID 7404713057 created successfully.
DataFrame for Match ID 7404668056 created successfully.
DataFrame for Match ID 7404577536 created successfully.
DataFrame for Match ID 7404488494 created successfully.
DataFrame for Match ID 7404381764 created successfully.
DataFrame for Match ID 7404249421 created successfully.
DataFrame for Match ID 7403117150 created successfully.
DataFrame for Match ID 7403057517 created successfully.
DataFrame for Match ID 7402993316 created succes

In [5]:
#defining a function that takes the dictionary of dataframes created above and makes a new singular dataframe containing all of the drafting phase data for all 151 matches
def mergedrafttimings(dataframes_dict):
    #creating an empty master dataframe
    master = pd.DataFrame()

    #looping through each dataframe in the dictionary
    for df_name, dataframe in dataframes_dict.items():
        #normalizing the data in the draft timings column
        drafttimings = pd.json_normalize(dataframe['draft_timings'])
        #creating a new dataframe of the normalized data
        drafttimingsmerged =pd.DataFrame(drafttimings.values.squeeze().tolist())
        #adding a 'match' column containing the match id
        drafttimingsmerged['match'] = df_name
        drafttimingsmerged['winning_team'] = dataframe['radiant_win']
        
        #concat the created dataframe to the master dataframe
        master = pd.concat([master, drafttimingsmerged], ignore_index=True)
    #returns the master dataframe
    return master


In [6]:
#executing the function created above and store the dataframe as draftdata
draftdata = mergedrafttimings(international2023matches)

In [7]:
#using the hero_data dataframe created earlier we are adding the heros actual name to our dataframe instead of having a numeric id
#merging the draft data frame with the hero_data frame using the id and hero_id as keys
draftdata = pd.merge(draftdata, hero_data[['id', 'localized_name']], left_on='hero_id', right_on='id', how='left')
#now that the hero name is added to the draftdata data frame we are dropping the hero_id and Id columns
draftdata.drop(['hero_id', 'id'], axis=1, inplace=True)

#renaming columns
draftdata.rename(columns={'localized_name': 'hero'}, inplace=True)
draftdata.rename(columns={'pick': 'action'}, inplace=True)

#remapping true and false values to say pick or ban
draftdata['action'] = draftdata['action'].map({True: 'pick', False: 'ban'})
#remapping team ids to say dire or radiant
draftdata['active_team'] = draftdata['active_team'].map({3: 'dire', 2: 'radiant'})
#remapping winning team field to say dire or radiant
draftdata['winning_team'] = draftdata['winning_team'].map({True: 'radiant', False: 'dire'})

#filling in all NaN values of winning team columns with respective true or false values
draftdata['winning_team'] = draftdata.groupby('match')['winning_team'].transform(lambda x: x.ffill().bfill())

#adding win column to show if the pick/ban resulted in a win or loss
draftdata['win'] = draftdata['winning_team'] == draftdata['active_team']  

#reordering columns
column_order = ['match','winning_team','order','action','active_team','hero','player_slot','extra_time','total_time_taken','win']
draftdata = draftdata[column_order]



In [8]:
draftdata

Unnamed: 0,match,winning_team,order,action,active_team,hero,player_slot,extra_time,total_time_taken,win
0,7406531302,dire,1,ban,dire,Chen,,130,0,True
1,7406531302,dire,2,ban,radiant,Weaver,,130,11,False
2,7406531302,dire,3,ban,radiant,Bristleback,,130,10,False
3,7406531302,dire,4,ban,dire,Necrophos,,130,45,True
4,7406531302,dire,5,ban,radiant,Magnus,,130,17,False
...,...,...,...,...,...,...,...,...,...,...
3581,7378530387,radiant,20,ban,radiant,Invoker,,80,29,True
3582,7378530387,radiant,21,ban,radiant,Monkey King,,28,93,True
3583,7378530387,radiant,22,ban,dire,Tidehunter,,15,42,False
3584,7378530387,radiant,23,pick,dire,Zeus,5.0,18,3,False


In [9]:
#creating a list of all heroes
heroes_x = hero_data['localized_name'].to_list()

#creating a list of all pick values
picks_y = []
for hero in heroes_x:
    picks = draftdata[(draftdata['hero'] == hero) & (draftdata['action'] == 'pick')]
    picks_y.append(len(picks))

#Creating a list of number of wins if hero is picked
wins_if_picked_y = []
for hero in heroes_x:
    wins = draftdata[(draftdata['hero'] == hero) & (draftdata['action'] == 'pick')& (draftdata['win'] == True)]
    wins_if_picked_y.append(len(wins))

#creating a list of all ban values
bans_y = []
for hero in heroes_x:
    bans = draftdata[(draftdata['hero'] == hero) & (draftdata['action'] == 'ban')]
    bans_y.append(len(bans))

#Creating a list of number of wins if hero is banned
wins_if_banned_y = []
for hero in heroes_x:
    wins = draftdata[(draftdata['hero'] == hero) & (draftdata['action'] == 'ban')& (draftdata['win'] == True)]
    wins_if_banned_y.append(len(wins))


In [10]:
data = {'hero': heroes_x, 'Times Picked': picks_y, 'Wins if Picked': wins_if_picked_y, 'Times Banned': bans_y, 'Wins if Banned': wins_if_banned_y}
cleaned_draftdata = pd.DataFrame(data)
cleaned_draftdata

Unnamed: 0,hero,Times Picked,Wins if Picked,Times Banned,Wins if Banned
0,Anti-Mage,0,0,2,0
1,Axe,5,3,5,4
2,Bane,2,2,3,2
3,Bloodseeker,3,1,7,3
4,Crystal Maiden,3,2,0,0
...,...,...,...,...,...
119,Mars,0,0,9,4
120,Dawnbreaker,30,16,34,22
121,Marci,2,0,0,0
122,Primal Beast,28,17,90,47


In [11]:
#writing draftdata data frame to excel
cleaned_draftdata.to_excel('draft_data.xlsx', index=False)

In [12]:
#sort the data from largest to smallest
sorted_pick_indices = sorted(range(len(picks_y)), key=lambda i: picks_y[i], reverse=True)
heroes_x = [heroes_x[i] for i in sorted_pick_indices]
picks_y = [picks_y[i] for i in sorted_pick_indices]
wins_if_picked_y = [wins_if_picked_y[i] for i in sorted_pick_indices]


In [13]:
#create bar graph
picks_bargraph = go.Figure(data=[
    go.Bar(name='Times Picked',x=heroes_x, y=picks_y),
    go.Bar(name='Wins If Picked',x=heroes_x, y=wins_if_picked_y)
    ])

# Customize layout
picks_bargraph.update_layout(title='TI23 Hero Pick Rate and Wins', xaxis_title='Hero', yaxis_title='Number of Games', template='plotly_dark', barmode='overlay')

# Show the plot
picks_bargraph.show()


In [14]:
#sort the data from largest to smallest
sorted_ban_indices = sorted(range(len(bans_y)), key=lambda i: bans_y[i], reverse=True)
heroes_x = [heroes_x[i] for i in sorted_ban_indices]
bans_y = [bans_y[i] for i in sorted_ban_indices]
wins_if_banned_y = [wins_if_banned_y[i] for i in sorted_ban_indices]

In [15]:
#create bar graph
bans_bargraph = go.Figure(data=[
    go.Bar(name='Times Banned',x=heroes_x, y=bans_y),
    go.Bar(name='Wins if Banned',x=heroes_x, y=wins_if_banned_y)
    ])

# Customize layout
bans_bargraph.update_layout(title='TI23 Hero Ban Rate and Wins if Banned', xaxis_title='Hero', yaxis_title='Number of Games', template='plotly_dark', barmode='overlay')

# Show the plot
bans_bargraph.show()