对线期分路（position）
1. off：劣势路
2. mid：中路
3. safe：优势路

定位（role）
1. core：优势路/中路/劣势路，前6分钟正补数最高的英雄
2. hard_support：购买真/假眼数量最多的英雄
3. soft_support：另一个非core英雄

In [17]:
import requests

def get_api_json(url, loop=True, proxy=None):
    try:
        return requests.get(url, headers={'User-Agent': 'Chrome'}, timeout=3, proxies=proxy).json()
    except requests.exceptions.RequestException as e:
        print(e)
        return get_api_json(url, loop, proxy) if loop is True else None

In [15]:
get_api_json("http://api.opendota.com/api/players/148351321/wl", loop=True, proxy=proxies[1])

{'win': 761, 'lose': 810}

In [4]:
from sqlalchemy import create_engine
from sqlalchemy.pool import NullPool
import pandas as pd

engine = create_engine('postgresql://dota2_readonly:dota2@dota2.heqiuzhi.xyz:5432/dota2', poolclass=NullPool)
dota2_hero_df = pd.read_sql("select * from hero", con=engine)
dota2_hero_id_to_name = { str(r['hero_id']): r['hero_name'] for _, r in dota2_hero_df.iterrows() }
dota2_hero_name_to_id = { r['hero_name']: str(r['hero_id']) for _, r in dota2_hero_df.iterrows() }
dota2_hero_df

Unnamed: 0,hero_id,ingame_name,en_official_name,primary_attr,attack_type,roles,legs,en_name,hero_name
0,1,npc_dota_hero_antimage,Anti-Mage,agi,Melee,"{Carry,Escape,Nuker}",2,antimage,敌法师
1,2,npc_dota_hero_axe,Axe,str,Melee,"{Initiator,Durable,Disabler,Jungler,Carry}",2,axe,斧王
2,3,npc_dota_hero_bane,Bane,int,Ranged,"{Support,Disabler,Nuker,Durable}",4,bane,祸乱之源
3,4,npc_dota_hero_bloodseeker,Bloodseeker,agi,Melee,"{Carry,Disabler,Jungler,Nuker,Initiator}",2,bloodseeker,血魔
4,5,npc_dota_hero_crystal_maiden,Crystal Maiden,int,Ranged,"{Support,Disabler,Nuker,Jungler}",2,crystal_maiden,水晶室女
...,...,...,...,...,...,...,...,...,...
117,126,npc_dota_hero_void_spirit,Void Spirit,int,Melee,"{Carry,Escape,Nuker,Disabler}",2,void_spirit,虚无之灵
118,128,npc_dota_hero_snapfire,Snapfire,str,Ranged,"{Support,Nuker,Disabler,Escape}",2,snapfire,电炎绝手
119,129,npc_dota_hero_mars,Mars,str,Melee,"{Carry,Initiator,Disabler,Durable}",2,mars,玛尔斯
120,135,npc_dota_hero_dawnbreaker,Dawnbreaker,str,Melee,"{Carry,Durable}",2,dawnbreaker,破晓辰星


In [5]:
match_json = get_api_json("https://api.opendota.com/api/matches/6227419633", loop=True, proxy=proxies[1])

In [13]:
def get_hero_position_and_role(match_json):
    row_dicts = []
    for p in match_json["players"]:
        row_dict = {}
        row_dict["camp"] = "radiant" if p["isRadiant"] is True else "dire"
        row_dict["hero_id"] = p["hero_id"]
        row_dict["hero_name"] = dota2_hero_id_to_name[str(p["hero_id"])]
        row_dict["lane_role"] = {1: "safe", 2: "mid", 3: "off"}[p["lane_role"]]
        row_dict["6_min_last_hits"] = p["lh_t"][6]
        purchase_ward_observer = p["purchase_ward_observer"] if "purchase_ward_observer" in p else 0
        purchase_ward_sentry = p["purchase_ward_sentry"] if "purchase_ward_sentry" in p else 0
        row_dict["purchase_ward_count"] = purchase_ward_observer + purchase_ward_sentry
        row_dicts.append(row_dict)
    w_df  = pd.DataFrame(row_dicts)
    core_hero_df = w_df.sort_values("6_min_last_hits", ascending=False).groupby(["camp", "lane_role"]).head(1)
    hard_support_hero_df = w_df.sort_values("purchase_ward_count", ascending=False).groupby(["camp"]).head(1)
    def get_role(r):
        if r["hero_id"] in core_hero_df.hero_id.tolist():
            return "core"
        elif r["hero_id"] in hard_support_hero_df.hero_id.tolist():
            return "hard_support"
        else:
            return "soft_support"
    w_df["role"] = w_df.apply(lambda r: get_role(r), axis = 1)
    return w_df

r_df = get_hero_position_and_role(match_json)
r_df

Unnamed: 0,camp,hero_id,hero_name,lane_role,6_min_last_hits,purchase_ward_count,role
0,radiant,48,露娜,safe,25,0,core
1,radiant,23,昆卡,mid,42,0,core
2,radiant,97,马格纳斯,off,30,0,core
3,radiant,85,不朽尸王,mid,10,6,soft_support
4,radiant,3,祸乱之源,safe,7,29,hard_support
5,dire,112,寒冬飞龙,safe,4,23,hard_support
6,dire,67,幽鬼,safe,24,0,core
7,dire,46,圣堂刺客,mid,40,1,core
8,dire,26,莱恩,off,1,10,soft_support
9,dire,2,斧王,off,28,0,core


## 如果获取1千、1万局比赛的数据呢？

1. 速度慢（1~3秒一个），解决方案：多进程 + IP代理池
2. 录像解析问题（有些对局的录像opendota没有解析，api只返回基础赛后结算数据），解决方案：调用接口请求opendota解析后再获取：https://www.opendota.com/request#6266137212

In [None]:
import requests
import random
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import time

def get_proxies():
    proxies = []
    for p in get_api_json("https://dota2.heqiuzhi.xyz/get_all"):
        if p["goal_web"] == "opendota":
            proxies.append({'http': f'http://{p["proxy"]}/', 'https': f'http://{p["proxy"]}/'})
    return proxies
        
proxies = get_proxies()
proxies

def parallel_run_api(func, match_ids, hero_ids=None, complete_rate=1):
    handled_match_ids, all_match_results = [], []
    handled_match_count = len(handled_match_ids)

    while len(handled_match_ids) < len(match_ids):
        proxies = get_proxies()
        if len(proxies) < 6:#如果可用的代理太少，也跑不动...
            print('only have {} valid proxies!'.format(len(proxies)))
            time.sleep(10)
            continue
        working_match_ids, working_hero_ids, run_proxies = [], [], []
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '当前进度:{}/{}'.format(len(handled_match_ids), len(match_ids)))
        for i, mi in enumerate(match_ids):
            if mi not in handled_match_ids:
                working_match_ids.append(match_ids[i])
                if hero_ids:
                    working_hero_ids.append(hero_ids[i])
                run_proxies.append(random.choice(proxies))
        with ThreadPoolExecutor(max_workers=16) as executor:
            parameters = (working_match_ids, working_hero_ids, run_proxies) if hero_ids else (working_match_ids, run_proxies)
            for match_result in executor.map(func, *parameters):
                if match_result:
                    all_match_results += match_result[0]
                    handled_match_ids.append(match_result[1])
                
        #如果一轮循环结束之后一场新比赛都没有下载到，则跳出
        if len(handled_match_ids) == handled_match_count:
            print('无法继续获取比赛，当前进度:{}/{}'.format(len(handled_match_ids), len(match_ids)))
            break
        elif len(handled_match_ids) / len(match_ids) >= complete_rate:
            break
        else:
            handled_match_count = len(handled_match_ids)
    return all_match_results