In [1]:
import time
import json
import requests
import numpy as np
import pandas as pd
from datetime import datetime

## Open Dota API

In [65]:
class API_dota_data:
    # Use Open Dota Api
    def __init__(self, verbose=False):
        self.verbose = verbose
       # self.last_match_id = 0

    def _call(self, url, parameters, tries=3):
        for i in range(tries):
            try:
                if self.verbose: print("Sending API request... ", end="", flush=True)
                responce = requests.get(url, params=parameters, timeout=20)
                load_responce = json.loads(responce.text)
                if self.verbose:
                    print("DONE!!!")
                return load_responce
            except Exception as exp:
                print("Failed. Trying again in 3s")
                print(exp)
                time.sleep(3)
        else:
            ValueError("Unable to connect to OpenDota API")

    # Return a list of 100 recent matches; save smaller match_id
    # So that download earlier matches
    def get_recent_pro_matches(self, use_last_match=False, last_match_id=0):
        parameters = dict()
        if use_last_match:
            parameters["less_than_match_id"] = last_match_id
        url = "https://api.opendota.com/api/proMatches"
        matches = self._call(url, parameters)
        self.last_match_id = min([i['match_id'] for i in matches])
        return matches

    # Return a dictionary with match information
    def get_match_info(self, match_id):
        url = f"https://api.opendota.com/api/matches/{str(match_id)}"
        return self._call(url, None)

    # Return a list with player's match history
    def get_player_matches_history(self, account_id):
        url = f"https://api.opendota.com/api/players/{account_id}/matches"
        return self._call(url, None)

    # Get a dictionary with lots of features of given account id (kills, deaths, gpm...)
    def get_player_totals(self, account_id, hero_id=None):
        parameters = {"sort": 1}
        if hero_id: parameters["hero_id"] = hero_id
        url = f"https://api.opendota.com/api/players/{account_id}/totals"
        return self._call(url, parameters)

    # Return wins and losses for a given account id
    def get_player_win_loss(self, account_id, hero_id=None):
        if hero_id:
            parameters = {"hero_id": hero_id}
        else:
            parameters = None
        url = "https://api.opendota.com/api/players/{account_id}/wl"
        responce = self._call(url, parameters)
        return responce['win'], responce['lose']

In [4]:
a = API_dota_data()

In [6]:
match = a.get_match_info(5980687608)

In [44]:
c = DataPreprocessing()
c.get_wards(match)
c.wards

Unnamed: 0,match_id,account_id,player_slot,hero_id,time,x,y,type
0,5980687608,186837494,0,51,-61,104,154,0
1,5980687608,186837494,0,51,381,106,156,0
2,5980687608,186837494,0,51,817,102,170,0
3,5980687608,186837494,0,51,917,138,110,0
4,5980687608,186837494,0,51,1058,96,140,0
...,...,...,...,...,...,...,...,...
77,5980687608,155332459,132,19,398,128,114,1
78,5980687608,155332459,132,19,435,82,164,1
79,5980687608,155332459,132,19,546,128,104,1
80,5980687608,155332459,132,19,1064,94,138,1


## Data preprocessing

In [84]:
class DataPreprocessing:
    def __init__(self):
        # Initialize tables as empty dataframes
        self.matches = pd.DataFrame()
        self.players = pd.DataFrame()
        self.chat = pd.DataFrame()
        self.objectives = pd.DataFrame()
        self.advantages = pd.DataFrame()
        self.events = pd.DataFrame()
        self.abilities = pd.DataFrame()
        self.wards = pd.DataFrame()
        self.previous_matches = pd.DataFrame()


    def get_match(self, match):
        """Get general information from the match and append to matches"""
        
        fields = ["match_id", "match_seq_num", "patch", "start_time", "duration",
            "game_mode", "first_blood_time", "barracks_status_dire",
            "barracks_status_radiant", "tower_status_dire", "tower_status_radiant",
            "dire_score", "radiant_score", "radiant_win"]
        try:
            prof_match = {key: [match[key]] for key in fields}
            prof_match["league_id"] = match["league"]["leagueid"]
            prof_match["league_name"] = match["league"]["name"]
            prof_match["radiant_team_id"] = match["radiant_team"]["team_id"]
            prof_match["radiant_team_name"] = match["radiant_team"]["name"]
            prof_match["dire_team_id"] = match["dire_team"]["team_id"]
            prof_match["dire_team_name"] = match["dire_team"]["name"]
        except KeyError:
            pass
        self.matches = self.matches.append(pd.DataFrame(prof_match), ignore_index=True)
    
    
    def get_match_chat(self, match):
        """Get match chat and save to chat"""
        fields = ["time", "type", "key", "slot", "player_slot"]
        messages = []
        if match["chat"]:
            for item in match["chat"]:
                message = {"match_id": match["match_id"]}
                for field in fields:
                    try:
                        message[field] = item[field]
                    except KeyError:
                        message[field] = np.nan
                messages.append(message.copy())
            if messages:
                self.chat = self.chat.append(pd.DataFrame(messages), ignore_index=True)

                
    def get_match_objectives(self, match):
        """Get game objectives like Roshan and towers and append to objectives dataframe"""
        fields = ["time", "type", "unit", "key", "slot", "player_slot"]
        objectives = []
        if match["objectives"]:
            for item in match["objectives"]:
                obj = {"match_id": match["match_id"]}
                for field in fields:
                    try:
                        obj[field] = item[field]
                    except KeyError:
                        obj[field] = np.nan
                objectives.append(obj.copy())
        if objectives:
            self.objectives = self.objectives.append(pd.DataFrame(objectives), ignore_index=True)

            
    def get_match_advantages(self, match):
        """Get radiant gold and xp advantage for each minute and append to advantages dataframe"""
        advantages = []
        # Gold advantage (gold_or_xp = 0)
        if match["radiant_gold_adv"]:
            for i, value in enumerate(match["radiant_gold_adv"]):
                adv = {
                    "match_id": match["match_id"],
                    "minute": i,
                    "gold_or_xp": 0,
                    "value": int(value)
                }
                advantages.append(adv.copy())
        # XP advantage (gold_or_xp = 1)
        if match["radiant_xp_adv"]:
            for i, value in enumerate(match["radiant_xp_adv"]):
                adv = {
                    "match_id": match["match_id"],
                    "minute": i,
                    "gold_or_xp": 1,
                    "value": int(value)
                }
                advantages.append(adv.copy())
        if advantages:
            self.advantages = self.advantages.append(pd.DataFrame(advantages), ignore_index= True)

            
    def get_players_events(self, match):
        """Get events for each player (kills, runes, bb and purchases) and append to events"""
        events = []
        for player in match["players"]:
            # Player's Buybacks
            if player["buyback_log"]:
                for bb in player["buyback_log"]:
                    tmp = {
                        "match_id": match["match_id"],
                        "account_id": player["account_id"],
                        "player_slot": player["player_slot"],
                        "hero_id": player["hero_id"],
                        "time": bb["time"],
                        "key": np.nan,
                        "event": "buyback"
                    }
                    events.append(tmp.copy())
            # Player's kills on enemy heroes
            if player["kills_log"]:
                for kill in player["kills_log"]:
                    tmp = {
                        "match_id": match["match_id"],
                        "account_id": player["account_id"],
                        "player_slot": player["player_slot"],
                        "hero_id": player["hero_id"],
                        "time": kill["time"],
                        "key": kill["key"],
                        "event": "kill"
                    }
                    events.append(tmp.copy())
            # Runes picked
            if player["runes_log"]:
                for rune in player["runes_log"]:
                    tmp = {
                        "match_id": match["match_id"], 
                        "account_id": player["account_id"], 
                        "player_slot": player["player_slot"],
                        "hero_id": player["hero_id"], 
                        "time": rune["time"],
                        "key": rune["key"],
                        "event": "rune"
                    }
                    events.append(tmp.copy())
            # Items purchased
            if player["purchase_log"]:
                for item in player["purchase_log"]:
                    tmp = {
                        "match_id": match["match_id"],
                        "account_id": player["account_id"],
                        "player_slot": player["player_slot"],
                        "hero_id": player["hero_id"],
                        "time": item["time"],
                        "key": item["key"],
                        "event": "purchase"
                    }
                    events.append(tmp.copy())
        if events:
            self.events = self.events.append(pd.DataFrame(events), ignore_index= True)

            
    def get_ability_upgrades(self, match):
        """Get skill upgrades for each player. Columns goes from 1 to 25 for each possible skill upgrade"""
        ability_upgrades = []
        for player in match["players"]:
            if player["ability_upgrades_arr"]:
                tmp = {
                    'match_id': match['match_id'],
                    'account_id': player['account_id'],
                    'player_slot': player['player_slot'],
                    'hero_id': player['hero_id'],
                }
                for i in range(25):
                    tmp['skill_upgrade_' + str(i + 1)] = np.nan
                for i, value in enumerate(player['ability_upgrades_arr']):
                    tmp['skill_upgrade_' + str(i + 1)] = value
                ability_upgrades.append(tmp.copy())
        if ability_upgrades:
            self.abilities = self.abilities.append(pd.DataFrame(ability_upgrades), ignore_index= True)

            
    def get_wards(self, match):
        """ Get time, position, slot and hero for each ward placed and append to self.wards dataframe. """
        wards = []
        for player in match['players']:
            if player['obs_log']:  # Observer wards (type = 0)
                for item in player['obs_log']:
                    ward = {
                        'match_id': match['match_id'], 'account_id': player['account_id'],
                        'player_slot': player['player_slot'], 'hero_id': player['hero_id'],
                        'time': item['time'], 'x': item['x'], 'y': item['y'], 'type': 0
                    }
                    wards.append(ward.copy())
            if player['sen_log']:  # Sentry wards (type = 1)
                for item in player['sen_log']:
                    ward = {
                        'match_id': match['match_id'], 'account_id': player['account_id'],
                        'player_slot': player['player_slot'], 'hero_id': player['hero_id'],
                        'time': item['time'], 'x': item['x'], 'y': item['y'], 'type': 1
                    }
                    wards.append(ward.copy())
        if wards:
            self.wards = self.wards.append(pd.DataFrame(wards), ignore_index= True)

            
    def get_players(self, match):
        """ Get match information for each player and append to self.players dataframe. """
        
        fields = ['player_slot', 'account_id', 'hero_id', 'kills', 'deaths',
            'assists', 'last_hits', 'denies', 'gold_per_min', 'xp_per_min',
            'gold_spent', 'hero_damage', 'hero_healing', 'tower_damage',
            'level', 'party_size', 'item_0', 'item_1', 'item_2', 'item_3',
            'item_4', 'item_5', 'camps_stacked', 'creeps_stacked', 'obs_placed', 'sen_placed',
            'purchase_tpscroll', 'rune_pickups', 'roshans_killed', 'towers_killed', 'win']

        players = []
        for item in match['players']:
            player = {'match_id': match['match_id']}
            for field in fields:
                if field in item:
                    player[field] = item[field]
                else:
                    player[field] = np.nan
            players.append(player.copy())
        if players:
            self.players = self.players.append(pd.DataFrame(players), ignore_index= True)

            
    def get_previous_matches(self, current_match_id, player_account_id, player_previous_matches,
                             current_match_start_time):
        """ Append all previous matches before match_start_time from a given account id. """
        
        previous_matches = []
        fields = ['match_id', 'player_slot', 'radiant_win', 'duration', 'game_mode',
                  'lobby_type', 'start_time', 'version', 'hero_id', 'kills', 'deaths',
                  'assists', 'skill', 'leaver_status', 'party_size']

        for item in player_previous_matches:
            previous_match = {'current_match_id': current_match_id, 'account_id': player_account_id}
            for field in fields:
                previous_match[field] = item[field]
            previous_matches.append(previous_match.copy())

        df = pd.DataFrame(previous_matches)
        # Avoid future games
        df = df[df['start_time'] < current_match_start_time]
        self.previous_matches = self.previous_matches.append(df, ignore_index= True)

        
    def get_all_current_match_tables(self, match_details):
        """ Get all tables from a current match, except the previous matches. """
        self.get_match(match_details)
        self.get_players(match_details)
        self.get_match_chat(match_details)
        self.get_match_objectives(match_details)
        self.get_match_advantages(match_details)
        self.get_ability_upgrades(match_details)
        self.get_players_events(match_details)
        self.get_wards(match_details)

In [62]:
def main(sleep_time = 2):
    api = API_dota_data(verbose= True)
    data = DataPreprocessing()
    recent_matches = api.get_recent_pro_matches()
    for recent_match in recent_matches:
        time.sleep(sleep_time)
        match_details = api.get_match_info(recent_match['match_id'])
        data.get_all_current_match_tables(match_details)
        # Get previous matches for all players with valid account ids
        players_with_account = data.players[data.players['account_id'] > 0]
        for i, player in players_with_account.iterrows():
            time.sleep(sleep_time)
            full_match_history = api.get_player_matches_history(player['account_id'])
            if full_match_history:
                data.get_previous_matches(match_details['match_id'], player['account_id'],
                full_match_history, match_details['start_time'])
    return data

In [80]:
recent_matches = api.get_recent_pro_matches()

for _ in range(9):
    time.sleep(3)
    seen_ids = [i["match_id"] for i in recent_matches]
    latest_match = np.min(seen_ids)
    recent_matches.extend(api.get_recent_pro_matches(use_last_match=True, last_match_id=latest_match))

Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!


In [86]:
api = API_dota_data(verbose=True)
dota_data = DataPreprocessing()
for recent_match in recent_matches:
    time.sleep(2)
    match_details = api.get_match_info(recent_match['match_id'])
    dota_data.get_all_current_match_tables(match_details)
    Get previous matches for all players with valid account ids
    players_with_account = data.players[data.players['account_id'] > 0]
    for i, player in players_with_account.iterrows():
        time.sleep(sleep_time)
        full_match_history = api.get_player_matches_history(player['account_id'])
        if full_match_history:
            data.get_previous_matches(match_details['match_id'], player['account_id'],
            full_match_history, match_details['start_time'])

Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending API request... DONE!!!
Sending 

In [111]:
for key, value in vars(dota_data).items():
    if not value.empty:
        value.to_csv(f"../data/{key}.tsv", sep="\t")

In [6]:
with open("../data/heroes.json") as file:
    heroes_id = json.load(file)

In [8]:
names = []
ids = []
for ent in heroes_id["heroes"]:
    names.append(ent["name"])
    ids.append(ent["id"])

In [11]:
pd.DataFrame({"name": names, "id": ids}).to_csv("../data/names_to_ids.tsv", sep="\t")