In [48]:
# Initialize
import requests
import json
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm

In [76]:
# Extract randomized match IDs
match_list = []

# Initial request - pull 100 initial matches
r = requests.get('https://api.opendota.com/api/publicMatches')
data = r.json()
for i in range(len(data)):
    match_list.append(data[i]['match_id'])
last_match = match_list[-1] # save the last match ID as the starting point for next batch of pull

# Make additional requests starting with the last match ID
for i in tqdm(range(0,99)):
    r = requests.get('https://api.opendota.com/api/publicMatches?less_than_match_id=' + str(last_match))
    data = r.json()
    for j in range(len(data)):
        match_list.append(data[j]['match_id'])
    last_match = match_list[-1] # save the last match ID as the starting point for next batch of request

# Save down to csv
pd.DataFrame(set(match_list)).to_csv('random_match.csv')
print('Length: '+ str(len(match_list)) )
print('Unique: '+ str(len(set(match_list))) )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  if sys.path[0] == '':


  0%|          | 0/99 [00:00<?, ?it/s]

KeyError: 0

In [78]:
# Create a formula to download match ID stat from Open Dota
import requests, json, time, os
def get_match_by_id(match_id):
    mid = str(match_id)
    # fetch match data
    r = requests.get('https://api.opendota.com/api/matches/' + mid)
    if r.ok:
        data = r.json()
        # save data
        file = open(os.getcwd() + os.sep + 'matches' + os.sep + mid + '_data.json', 'w')
        json.dump(data, file)
        file.close()

In [79]:
# Collect match data
match_list = list(pd.read_csv('random_match.csv')['0'])
for i in tqdm(range(0,len(match_list))):
    get_match_by_id(match_list[i])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


  0%|          | 0/10000 [00:00<?, ?it/s]

In [81]:
# Retrieve and save latest hero information
def get_hero_stats():
    data = requests.get('https://api.opendota.com/api/heroStats').json()
    pd.DataFrame(data).to_csv('hero_stats.csv', index = False)
get_hero_stats()

ValueError: If using all scalar values, you must pass an index

In [86]:
# Consolidate key data in a dataframe
df1 = []
error_count = 0
bot_match = 0
no_pick = 0
not_ten_picks = 0

for f in tqdm(os.listdir(os.getcwd() + os.sep + 'matches')):
    if os.path.isdir(f):  # skip subdirectories (folders)
        continue
    file = open(os.getcwd() + os.sep + 'matches' + os.sep + f)
    data = json.load(file)
    file.close()
    
    # filter for game mode: all pick (value = 2)
    if data['game_mode'] not in [22]:
        continue
    
    # skip errors
    if 'error' in data.keys():
        error_count = error_count + 1
        continue

    # skip matches with bots
    if data['human_players'] != 10:
        bot_match = bot_match + 1
        continue
    
    # skip matches with no pick data
    if not data['picks_bans']:
        no_pick = no_pick + 1
        continue
        
    # skip matches without 10 picks
    if pd.DataFrame(data['picks_bans']).is_pick.sum() != 10:
        not_ten_picks = not_ten_picks + 1
        continue
    
    # assign team (radiant = team 0)
    win_team = 0
    if data['radiant_win'] == False:
        win_team = 1
    
    # get hero pick data
    picks = pd.DataFrame(data['picks_bans'])
    picks = picks[picks.is_pick == True] # drop bans
    picks = picks.sort_values('order')
    
    # append all match information
    df_temp = [data['match_id'],      # match ID
               data['duration'],      # game duration
               win_team,              # winning team (0=radiant, 1=dire)
               data['radiant_score'], # radiant final score
               data['dire_score'],    # dire final score
               data['skill']          # Valve's assigned skill level of the game
              ]
    # skip matches without a 5v5 pick
    if (picks[picks.team==win_team].shape[0] !=5) or (picks[picks.team!=win_team].shape[0] !=5) : 
        continue
    # winning team's hero IDs
    for j in range(0,5):
        df_temp.append(picks[picks.team==win_team].iloc[j,1])
    # losing team's hero IDs
    for k in range(0,5):
        df_temp.append(picks[picks.team!=win_team].iloc[k,1])
    df1.append(df_temp)
df = pd.DataFrame(df1, columns = ['match_id','duration','win_team','score_r','score_d','skill','w1','w2','w3','w4','w5','l1','l2','l3','l4','l5'])

# Print results
print('Error: '        + str(error_count))
print('Bot: '          + str(bot_match))
print('No pick data: ' + str(no_pick))
print('W/o 10 picks: ' + str(not_ten_picks))
print('Dataset: '      + str(df.shape[0]))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


  0%|          | 0/2416 [00:00<?, ?it/s]

Error: 0
Bot: 0
No pick data: 0
W/o 10 picks: 629
Dataset: 1614


In [87]:
# Save output
df.to_csv('data.csv',index=False)
df.head()

Unnamed: 0,match_id,duration,win_team,score_r,score_d,skill,w1,w2,w3,w4,w5,l1,l2,l3,l4,l5
0,6374093605,3205,0,44,35,,9,71,50,22,46,119,83,21,99,1
1,6374093612,2671,0,47,26,,40,75,22,99,42,105,52,44,11,35
2,6374093615,2172,0,66,40,,84,85,70,10,136,121,120,29,48,101
3,6374093618,1567,0,35,16,,34,64,48,99,35,25,26,78,94,126
4,6374093619,2235,0,41,30,,87,84,78,10,41,14,50,29,67,34


In [89]:
df.shape

(1614, 16)

In [90]:
# Create formula to lookup a list of hero names from their hero IDs
df_heroes = pd.read_csv('hero_stats.csv')[['localized_name','hero_id']]
lookup = {k:v for k,v in zip(df_heroes.hero_id, df_heroes.localized_name)}
def id2name(ids):
    return [lookup[i] for i in ids]

# Map hero names into the main Dataframe
df_named = df.copy()
for i in df_named.columns[6:]:
    df_named[i] = id2name(df_named[i])

In [91]:
# Save output
df_named.to_csv('data_named.csv',index=False)
df_named.head()

Unnamed: 0,match_id,duration,win_team,score_r,score_d,skill,w1,w2,w3,w4,w5,l1,l2,l3,l4,l5
0,6374093605,3205,0,44,35,,Mirana,Spirit Breaker,Dazzle,Zeus,Templar Assassin,Dark Willow,Treant Protector,Windranger,Bristleback,Anti-Mage
1,6374093612,2671,0,47,26,,Venomancer,Silencer,Zeus,Bristleback,Wraith King,Techies,Leshrac,Phantom Assassin,Shadow Fiend,Sniper
2,6374093615,2172,0,66,40,,Ogre Magi,Undying,Ursa,Morphling,Marci,Grimstroke,Pangolier,Tidehunter,Luna,Skywrath Mage
3,6374093618,1567,0,35,16,,Tinker,Jakiro,Luna,Bristleback,Sniper,Lina,Lion,Brewmaster,Medusa,Void Spirit
4,6374093619,2235,0,41,30,,Disruptor,Ogre Magi,Brewmaster,Morphling,Faceless Void,Pudge,Dazzle,Tidehunter,Spectre,Tinker


In [92]:
df_named.shape

(1614, 16)