In [8]:
import requests, json, time, os
import numpy as np
import pandas as pd

# https://docs.opendota.com/#tag/matches

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.9f' % x)



### Mostly leveraging Functional Programming Concepts. In the Future, I might Consider Using OOP if I need to Reproduce or Scale.

In [22]:
def get_matches(account_id):
    """This is a function to retrieve all Dota 2 match IDs for a given account id
    """
    acc_id = str(account_id)
    request = requests.get("https://api.opendota.com/api/players/" + acc_id + "/matches")
    if request.ok:
        print("GET: Success")
        data = request.json()
    return pd.DataFrame(data)

In [18]:
def get_heroes_info():
    """This function returns hero data and stats. The information changes over time
    """
    data = requests.get("https://api.opendota.com/api/heroStats").json()
    return pd.DataFrame(data)

In [20]:
df_heroes = get_heroes_info()

In [24]:
df = get_matches(account_id)

GET: 190040093


In [27]:
print(f'The total number of games played is {df.shape[0]}')

The total number of games played is 5826


In [28]:
print(f'The total number of heroes in Dota is {df_heroes.shape[0]}')

The total number of heroes in Dota is 124


In [33]:
# Mapping the heroe played in each match to their localized name
df['hero'] = df['hero_id'].map(df_heroes.set_index('id')['localized_name'])

In [34]:
df.head()

Unnamed: 0,match_id,player_slot,radiant_win,duration,game_mode,lobby_type,hero_id,start_time,version,kills,deaths,assists,skill,average_rank,leaver_status,party_size,hero
0,7423960106,0,False,2633,22,7,71,1699329495,,7,4,14,,34.0,0,,Spirit Breaker
1,7423812519,130,False,3433,22,7,71,1699318810,,6,9,23,,34.0,0,,Spirit Breaker
2,7421880778,3,True,2163,22,7,71,1699244568,,5,3,9,,34.0,0,,Spirit Breaker
3,7421787804,0,True,2145,22,7,5,1699238474,,0,7,11,,34.0,0,,Crystal Maiden
4,7421743038,128,False,1874,22,7,50,1699235368,,2,5,13,,33.0,0,,Dazzle


In [31]:
def df_to_csv(df, filename):
    return df.to_csv(filename, index=False, encoding='utf-8')

In [32]:
def df_to_pickle(df, filename):
    return df.to_pickle(filename)

In [42]:
def get_matches_info(match_id):
    m_id = str(match_id)
    request = requests.get("https://api.opendota.com/api/matches/" + m_id)
    if request.ok:
        print("GET:", m_id)
        n_data = request.json()
    return n_data

In [43]:
df.shape

(5826, 17)

In [123]:
result = df.match_id
try:
    old_df = pd.read_csv('match.csv')
    old_id = old_df.match_id
    result = pd.Series(list(set(result) - set(old_id)))
except FileNotFoundError as f:
    pass

In [124]:
result

Series([], dtype: object)

In [38]:
match_list = []

In [91]:
# Trying to catch unboundlocalerrors and TypeErrors
counter = 0
for i in result:
    try:
        match_data = get_matches_info(i)
        match_list.append(match_data)
        counter+=1
        print(counter)
    except TypeError:
        continue
    except UnboundLocalError:
        match_data = get_matches_info(i)
        match_list.append(match_data)
        continue
    time.sleep(3)

GET: 4858777889
3703
GET: 4858696765
3704
GET: 4858647894
3705
GET: 4847389811
3706
GET: 4847347458
3707
GET: 4847317683
3708
GET: 4845016267
3709
GET: 4844976368
3710
GET: 4844725009
3711
GET: 4844636700
3712
GET: 4844550180
3713
GET: 4844475474
3714
GET: 4844381611
3715
GET: 4844291216
3716
GET: 4842001700
3717
GET: 4841933431
3718
GET: 4841873977
3719
GET: 4839310427
3720
GET: 4836897869
3721
GET: 4836806830
3722
GET: 4836768616
3723
GET: 4836731322
3724
GET: 4831901669
3725
GET: 4831858401
3726
GET: 4829516638
3727
GET: 4829456727
3728
GET: 4829407244
3729
GET: 4826986921
3730
GET: 4826955994
3731
GET: 4826899057
3732
GET: 4826843505
3733
GET: 4826770190
3734
GET: 4826686289
3735
GET: 4824182741
3736
GET: 4824106491
3737
GET: 4824049200
3738
GET: 4823987228
3739
GET: 4823938912
3740
GET: 4823335520
3741
GET: 4823186551
3742
GET: 4823022409
3743
GET: 4822570417
3744
GET: 4820459000
3745
GET: 4820403219
3746
GET: 4820278775
3747
GET: 4820190639
3748
GET: 4819999443
3749
GET: 48197715

In [100]:
print(f'The total number of matches parsed is {len([i for i in match_list])}')


The total number of matches parsed is 5826


In [129]:
try:
    # change filename to match_info
    df_match = pd.read_csv('test.csv')
    df_match = data.append(match_list, ignore_index=True)
except FileNotFoundError as f:
    df_match = pd.DataFrame(match_list)

In [125]:
df_match.shape

(5826, 45)

In [96]:
df_to_csv(df_match, 'match_info.csv')
df_to_csv(df_heroes, 'heroes.csv')
df_to_csv(df, 'match.csv')

In [97]:
df_to_pickle(df_match, 'match_info.pkl')
df_to_pickle(df_heroes, 'heroes.pkl')
df_to_pickle(df, 'match.pkl')