In [1]:
import requests
import json
import time
import pandas as pd
from tqdm import tqdm
from datetime import datetime
from sklearn.model_selection import train_test_split

In [2]:
# получение информации о грероях используя OpenDota API
r = requests.get('https://api.opendota.com/api/heroes')
heroes_data = json.loads(r.text)
heroes_data[0]

{'id': 1,
 'name': 'npc_dota_hero_antimage',
 'localized_name': 'Anti-Mage',
 'primary_attr': 'agi',
 'attack_type': 'Melee',
 'roles': ['Carry', 'Escape', 'Nuker'],
 'legs': 2}

In [3]:
path ='data/'

# сохранение id и имени героя 
dota_heroes_info ={}
for d in heroes_data:
    hero_id = d['id']
    name = d['localized_name']
    dota_heroes_info[hero_id] = name

with open(path+'dota_hero_info.json', 'w') as f:
    f.write(json.dumps(dota_heroes_info))

In [4]:
# получения данных о матчах и создание pd.DataFrame, сыгранных 
def get_matches_info(requests_count=60*60*3,
                     df=None,
                     link='https://api.opendota.com/api/publicMatches',
                     min_rank='80', # 
                     less_than_match_id='7322684193', 
                     path = 'raw_match_info.csv'):
    if not(type(df) == pd.core.frame.DataFrame):
        df = pd.DataFrame()
        
        columns = ['match_id',
                   'radiant_hero_1','radiant_hero_2','radiant_hero_3','radiant_hero_4','radiant_hero_5',
                   'dire_team_1','dire_team_2','dire_team_3','dire_team_4','dire_team_5',
                   'radiant_win',]
        df = pd.DataFrame(columns=columns)
    
    error_count = 0
    for i in tqdm(range(requests_count)):
        try:
            #запрос данных матча
            r = requests.get(link,params={'min_rank':min_rank,'less_than_match_id':less_than_match_id})
            data = json.loads(r.text)
            
            # объединение df
            additional_df = create_df_from_data(data)             
            less_than_match_id = additional_df['match_id'].min() 
            df = df.append(additional_df, ignore_index=True)
            df = df.astype({'radiant_hero_1':int,'radiant_hero_2':int,'radiant_hero_3':int,'radiant_hero_4':int,'radiant_hero_5':int,
                            'dire_hero_1':int,'dire_hero_2':int,'dire_hero_3':int,'dire_hero_4':int,'dire_hero_5':int,})
        except:    
            error_count+=1
        
        #обход ограничения на 60 запросов в минуту
        time.sleep(1)   
        
        #сохранение части данных 
        if i%300==0:
            df.to_csv(path)  
    
    return df

# получение нужных данных из запроса
def create_df_from_data(data):
    final_data = []
    for d in data:
        radiant_heroes = list(map(int, d['radiant_team'].split(',')))        
        dire_heroes = list(map(int, d['dire_team'].split(',')))
        
        # проверка 
        if len(radiant_heroes)==5 and len(dire_heroes)==5:
            final_data.append({'match_id':d['match_id'],                           
                           'radiant_hero_1':radiant_heroes[0],
                           'radiant_hero_2':radiant_heroes[1],
                           'radiant_hero_3':radiant_heroes[2],
                           'radiant_hero_4':radiant_heroes[3],
                           'radiant_hero_5':radiant_heroes[4],
                           'dire_hero_1':dire_heroes[0],
                           'dire_hero_2':dire_heroes[1],
                           'dire_hero_3':dire_heroes[2],
                           'dire_hero_4':dire_heroes[3],
                           'dire_hero_5':dire_heroes[4],
                           'radiant_win':d['radiant_win'],})        
        
    df = pd.DataFrame(final_data)    
    return df

# сохранение данных матча
def save_matches_info(df,name = 'matches_info.csv'):
    now = datetime.now()
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
    for key in "_/: ":
        dt_string = dt_string.replace(key,'_')
    path = 'data/'+dt_string+'_'+name
    df.to_csv(path)

In [5]:
# создание df с информацие о матчах
columns = ['match_id',
           'radiant_hero_1','radiant_hero_2','radiant_hero_3','radiant_hero_4','radiant_hero_5',
           'dire_hero_1','dire_hero_2','dire_hero_3','dire_hero_4','dire_hero_5',
           'radiant_win',]
df = pd.DataFrame(columns=columns)

# получение и сохранение данных данных 
df = get_matches_info(requests_count=(10),df=df,path=path+'raw_match_info.csv')
save_matches_info(df)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:15<00:00,  1.57s/it]


In [7]:
# разделение данных на train, val, test с соотношении 0.8:0.04:0.16
train_data,val_test_data = train_test_split(df)
val_data, test_data = train_test_split(val_test_data,test_size=0.8)

train_data.to_csv(path+ 'train_data'+'.csv')
val_data.to_csv(path+ 'val_data'+'.csv')
test_data.to_csv(path+ 'test_data'+'.csv')