In [13]:
import copy
import json
import typing as ty

import pandas as pd
import requests

SELECT DISTINCT team_match.team_id, team_match.match_id
FROM team_match
JOIN teams ON team_match.team_id = team_match.team_id
JOIN matches ON matches.match_id = team_match.match_id
WHERE matches.start_time >= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') 
AND matches.start_time <= extract(epoch from timestamp '2021-10-01T21:00:00.000Z')
AND teams.team_id = 4 
LIMIT 10

<h2>Select certain number of professional teams

SELECT *
FROM teams
LIMIT 10

In [3]:
with open('parsed_teams.json', 'r', encoding='utf-8') as file:
    teams = json.loads(file.read())

In [7]:
teams_ids = [team['team_id'] for team in teams]
teams_ids

[1292768,
 7059618,
 6211207,
 1148270,
 1292509,
 887889,
 1521285,
 1061163,
 6209269,
 4949825]

<h2>Select certain number of matches for each professional team

In [15]:
attrs = 'matches.match_id, matches.chat, matches.radiant_win, matches.duration, matches.first_blood_time, matches.radiant_score, matches.dire_score, matches.objectives, matches.teamfights, matches.radiant_gold_adv, matches.radiant_xp_adv'
attrs

'matches.match_id, matches.chat, matches.radiant_win, matches.duration, matches.first_blood_time, matches.radiant_score, matches.dire_score, matches.objectives, matches.teamfights, matches.radiant_gold_adv, matches.radiant_xp_adv'

SELECT *
FROM matches
WHERE matches.start_time >= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') AND matches.start_time <= extract(epoch from timestamp '2021-10-01T21:00:00.000Z')
AND (matches.radiant_team_id = 4 OR matches.dire_team_id = 4)
LIMIT 10

In [39]:
def get_teams_matches(teams_ids: ty.List[int]):
    with open('parsed_matches.json', 'w', encoding='utf-8') as writer: 
        matches_dicts = []
        responses_status = []
        for team_id in teams_ids:
            response = requests.get(
                f"https://api.opendota.com/api/explorer?sql=SELECT {attrs} FROM matches WHERE matches.start_time >= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') AND matches.start_time <= extract(epoch from timestamp '2021-10-01T21:00:00.000Z') AND (matches.radiant_team_id = {team_id} OR matches.dire_team_id = {team_id}) AND matches.match_id IS NOT NULL LIMIT 2"
            )
            print(response.status_code, team_id)
            matches_dicts.append(response.json())
            responses_status.append(response.status_code)
        shared_dict = []
        for dict_ in matches_dicts:
            shared_dict.append(dict_)
        if all([status_code == 200 for status_code in responses_status]):
            dict_to_save = json.dumps(shared_dict)
            writer.write(dict_to_save)
        else:
            print('Something went wrong, status code is: ', response.status_code)

In [40]:
get_teams_matches(teams_ids)

400 1292768
400 7059618
429 6211207
429 1148270
429 1292509
429 887889
429 1521285
429 1061163
429 6209269
429 4949825
Something went wrong, status code is:  429


In [37]:
with open('parsed_matches.json', 'r', encoding='utf-8') as file:
    data = json.loads(file.read())
all_list_data = []
for data_ in data:
    all_list_data.append(data_['rows'])
# all_list_data

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [34]:
flat_list = [item for sublist in all_list_data for item in sublist]
len(flat_list)

0

SELECT match_id, radiant_team_id, dire_team_id, rows_count
FROM (
    SELECT ROW_NUMBER() OVER (PARTITION BY matches.radiant_team_id) as rows_count, *
    FROM matches
) all_matches
WHERE (
    all_matches.rows_count <= 6 AND
    start_time >= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') AND 
    start_time <= extract(epoch from timestamp '2021-10-01T21:00:00.000Z')
)
ORDER BY RADIANT_TEAM_ID
LIMIT 10


In [42]:
import pandas as pd
import numpy as np

In [44]:
data = pd.read_csv('../dota2-4k-matches.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,0,6022659410,0,0,3,3,3,2,3,3,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,1,6022659410,60,0,3,3,3,2,3,3,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,2,6022659410,120,0,3,3,3,2,3,3,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,3,6022659410,180,0,3,3,3,2,3,3,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,4,6022659410,240,0,3,3,3,2,3,3,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0


<h2>Статистика для каждого игрока по каждому матчу (суммарная статистика за предыдуще матчи)

In [75]:
sql_attrs = 'player_matches.match_id, player_matches.account_id, player_matches.kills, player_matches.deaths, player_matches.assists, player_matches.gold, player_matches.last_hits, player_matches.denies, player_matches.gold_per_min, player_matches.hero_damage'
sql_attrs

'player_matches.match_id, player_matches.account_id, player_matches.kills, player_matches.deaths, player_matches.assists, player_matches.gold, player_matches.last_hits, player_matches.denies, player_matches.gold_per_min, player_matches.hero_damage'

In [76]:
limit = 10000
with open('players-previous-games.json', 'w', encoding='utf-8') as writer:
    players_dicts = []
    responses_status = []
    for offset in range(0, 40000, 10000):
        response = requests.get(
            f"https://api.opendota.com/api/explorer?sql=SELECT {sql_attrs} FROM player_matches JOIN matches using(match_id) WHERE TRUE AND matches.start_time >= extract(epoch from timestamp '2021-02-01T21:00:00.000Z') AND matches.start_time <= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') ORDER BY player_matches.match_id NULLS LAST LIMIT {limit} OFFSET {offset}"
        )
        players_dicts.append(response.json())
        responses_status.append(response.status_code)
    shared_dict = []
    for dict_ in players_dicts:
        shared_dict.append(dict_)
    if all([status_code == 200 for status_code in responses_status]):
        dict_to_save = json.dumps(shared_dict)
        writer.write(dict_to_save)
    else:
        print('Something went wrong, status code is: ', response.status_code)

In [81]:
limit = 1000
with open('matches.json', 'w', encoding='utf-8') as writer:
    matches_dicts = []
    responses_status = []
    for offset in range(0, 4000, 1000):
        print(offset)
        response = requests.get(
            f"https://api.opendota.com/api/explorer?sql=SELECT {attrs} FROM matches JOIN match_patch using(match_id) WHERE TRUE AND matches.start_time >= extract(epoch from timestamp '2021-02-01T21:00:00.000Z') AND matches.start_time <= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') ORDER BY matches.match_id NULLS LAST LIMIT {limit} OFFSET {offset}"
        )
        matches_dicts.append(response.json())
        responses_status.append(response.status_code)
    shared_dict = []
    for dict_ in matches_dicts:
        shared_dict.append(dict_)
    if all([status_code == 200 for status_code in responses_status]):
        dict_to_save = json.dumps(shared_dict)
        writer.write(dict_to_save)
    else:
        print('Something went wrong, status code is: ', response.status_code)

0
1000
2000
3000


In [77]:
with open('players-previous-games.json', 'r', encoding='utf-8') as file:
    data = json.loads(file.read())

In [31]:
def process_players(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        players = json.loads(file.read())
    all_list_data = []
    for data_ in players:
        all_list_data.append(data_['rows'])
    flat_list = [item for sublist in all_list_data for item in sublist]
    return flat_list

In [60]:
players = process_players('../players/players.json')

In [78]:
data = process_players('players-previous-games.json')

In [79]:
len(data)

40000

In [55]:
data_test = data[:10]

In [70]:
def process_matches(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        matches = json.loads(file.read())
    all_list_data = []
    for data_ in matches:
        all_list_data.append(data_['rows'])
    flat_list = [item for sublist in all_list_data for item in sublist]
    return flat_list

In [82]:
matches = process_matches('matches.json')

In [65]:
players_for_match = players[0:10]

1) Скачать всех игроков в период с февраль по начало июня (их матчи)
2) распарсить каждого игрока, создать словарь с его айдишников и соответсвтующими данными
3) пройтись по датафрейму из 4к матчей по каждому матчу и добавить инфу об соответствующих игроках

In [83]:
players = data

In [85]:
players[0]['match_id']

5814050985

In [86]:
matches[0]['match_id']

5814050985

In [90]:
players[0]

{'match_id': 5814050985,
 'account_id': 112545824,
 'kills': 1,
 'deaths': 5,
 'assists': 2,
 'gold': 345,
 'last_hits': 124,
 'denies': 1,
 'gold_per_min': 319,
 'hero_damage': 6196}

In [88]:
test_matches = matches[:5]

In [149]:
players_data = {}

lower = 0
upper = 10
# player_data = {key: 0 for key in list(players[0].keys()) if key not in ['match_id', 'account_id']}
for match in test_matches:
    players_ = players[lower:upper]
    lower += 10
    upper += 10
    
    for player in players_:
        if player['account_id'] not in players_data:
            players_data[player['account_id']] = \
                {key: [value] for key, value in player.items() if key not in ['match_id', 'account_id']}
            players_data[player['account_id']].update({'matches_count': 1})
            
            continue
        elif player['account_id'] in players_data:
            player_data_current = players_data[player['account_id']]
            for key, value in player.items():
                if key not in ['match_id', 'account_id']:
                    players_data[player['account_id']][key].append(value)
            matches_count = players_data[player['account_id']]['matches_count']   
            matches_count += 1
            players_data[player['account_id']].update({'matches_count': matches_count})
            
# определяем средние значение для каждой из компонент
for player_id, player_stats in players_data.items():
    for key, value in player_stats.items():
        if key != matches_count:
            players_data[player_id][key] = np.mean(value)

In [150]:
len(list(players_data.keys()))

30

In [152]:
id_ = 50828662
for pl in players[0:60]:
    if pl['account_id'] == id_:
        print(pl)

{'match_id': 5814127265, 'account_id': 50828662, 'kills': 3, 'deaths': 6, 'assists': 23, 'gold': 4785, 'last_hits': 144, 'denies': 4, 'gold_per_min': 421, 'hero_damage': 29359}
{'match_id': 5814176857, 'account_id': 50828662, 'kills': 0, 'deaths': 2, 'assists': 17, 'gold': 497, 'last_hits': 19, 'denies': 0, 'gold_per_min': 246, 'hero_damage': 6668}


In [153]:
players_data[id_]

{'kills': 1.5,
 'deaths': 4.0,
 'assists': 20.0,
 'gold': 2641.0,
 'last_hits': 81.5,
 'denies': 2.0,
 'gold_per_min': 333.5,
 'hero_damage': 18013.5,
 'matches_count': 2.0}

<h3>Тест

In [301]:
def get_mean_stats_for_players(matches):
    players_data = {}

    lower = 0
    upper = 10
    # player_data = {key: 0 for key in list(players[0].keys()) if key not in ['match_id', 'account_id']}
    for match in matches:
        players_ = players[lower:upper]
        lower += 10
        upper += 10

        for player in players_:
            if player['account_id'] not in players_data:
                players_data[player['account_id']] = \
                    {key: [value] for key, value in player.items() if key not in ['match_id', 'account_id']}
                players_data[player['account_id']].update({'matches_count': 1})

                continue
            elif player['account_id'] in players_data:
                player_data_current = players_data[player['account_id']]
                for key, value in player.items():
                    if key not in ['match_id', 'account_id']:
                        players_data[player['account_id']][key].append(value)
                matches_count = players_data[player['account_id']]['matches_count']   
                matches_count += 1
                players_data[player['account_id']].update({'matches_count': matches_count})

    # определяем средние значение для каждой из компонент
    for player_id, player_stats in players_data.items():
        for key, value in player_stats.items():
            if key != matches_count:
                players_data[player_id][key] = np.mean(value)
                
    return players_data

In [302]:
len(list(get_mean_stats_for_players(matches).keys()))

2582

In [303]:
len(matches)

4000

<h2>Матчи из датасета dota4k-matches</h2> <br>

In [317]:
dota4k_matches = pd.read_csv('../dota2-4k-matches.csv', index_col=0)
dota4k_matches.head()

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,6022659410,0,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0


In [171]:
def process_matches(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        matches = json.loads(file.read())
    all_list_data = []
    for data_ in matches:
        all_list_data.append(data_['rows'])
    flat_list = [item for sublist in all_list_data for item in sublist]
    return flat_list

In [261]:
matches_4k = process_matches('../matches/matches.json')
players_4k = process_players('players.json')

In [176]:
matches_4k[0]['match_id']

6022659410

In [185]:
players_4k[0:2]

[{'match_id': 6022659410,
  'account_id': 119898344,
  'kills': 6,
  'deaths': 3,
  'assists': 1,
  'gold': 879,
  'last_hits': 289,
  'denies': 14,
  'gold_per_min': 582,
  'hero_damage': 12572},
 {'match_id': 6022659410,
  'account_id': 101642221,
  'kills': 6,
  'deaths': 6,
  'assists': 9,
  'gold': 245,
  'last_hits': 50,
  'denies': 0,
  'gold_per_min': 266,
  'hero_damage': 15971}]

<h3>Создание словаря с матчами и игроками (4к датасет)

In [191]:
def create_matches_players_dict():
    low, upp = 0, 10
    matches_players_dict = {}
    for match in matches_4k:
        players__ = players_4k[low:upp]
        low += 1
        upp += 1
        
        list_of_players = [pl['account_id'] for pl in players__]
        for pl in players__:
            matches_players_dict[match['match_id']] = {'players_ids': list_of_players}
    
    return matches_players_dict

In [192]:
list_of_matches_players = create_matches_players_dict()

In [196]:
list_of_matches_players[6022659410]

{'players_ids': [119898344,
  101642221,
  86940305,
  130271765,
  162641196,
  337575662,
  104512126,
  100616105,
  105610776,
  187619311]}

In [197]:
def get_match_players(match_id: int):
    list_of_matches_players = create_matches_players_dict()
    return list_of_matches_players[match_id]

In [198]:
sql_attrs

'player_matches.match_id, player_matches.account_id, player_matches.kills, player_matches.deaths, player_matches.assists, player_matches.gold, player_matches.last_hits, player_matches.denies, player_matches.gold_per_min, player_matches.hero_damage'

In [210]:
new_df_columns = [attr.split('.')[1] for attr in sql_attrs.split(', ') if attr.split('.')[1] not in ['match_id']]
new_df_columns

['account_id',
 'kills',
 'deaths',
 'assists',
 'gold',
 'last_hits',
 'denies',
 'gold_per_min',
 'hero_damage']

In [226]:
prefixes = [f'r{i}' for i in range(1, 6)] + [f'd{i}' for i in range(1, 6)]
prefixes

['r1', 'r2', 'r3', 'r4', 'r5', 'd1', 'd2', 'd3', 'd4', 'd5']

In [229]:
cols = []
for col in new_df_columns:
    for prefix in prefixes:
        if col == 'account_id':
            cols.append(prefix + '_' + col)
        else:
            cols.append(prefix + '_mean_' + col)

In [231]:
len(cols)

90

In [233]:
cols[:10]

['r1_account_id',
 'r2_account_id',
 'r3_account_id',
 'r4_account_id',
 'r5_account_id',
 'd1_account_id',
 'd2_account_id',
 'd3_account_id',
 'd4_account_id',
 'd5_account_id']

In [263]:
new_df = dota4k_matches
new_df.head()

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,6022659410,0,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0


In [264]:
for col_ in cols:
    new_df[col_] = 0

In [266]:
new_df

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,r1_mean_hero_damage,r2_mean_hero_damage,r3_mean_hero_damage,r4_mean_hero_damage,r5_mean_hero_damage,d1_mean_hero_damage,d2_mean_hero_damage,d3_mean_hero_damage,d4_mean_hero_damage,d5_mean_hero_damage
0,6022659410,0,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145523,6174722110,2220,0,0,1,1,2,1,1,1,...,0,0,0,0,0,0,0,0,0,0
145524,6174722110,2280,0,0,1,1,2,1,1,1,...,0,0,0,0,0,0,0,0,0,0
145525,6174722110,2340,0,0,1,1,2,1,1,1,...,0,0,0,0,0,0,0,0,0,0
145526,6174722110,2400,0,0,0,1,2,1,1,1,...,0,0,0,0,0,0,0,0,0,0


In [268]:
test_df = new_df[:100]
test_df.tail()

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,r1_mean_hero_damage,r2_mean_hero_damage,r3_mean_hero_damage,r4_mean_hero_damage,r5_mean_hero_damage,d1_mean_hero_damage,d2_mean_hero_damage,d3_mean_hero_damage,d4_mean_hero_damage,d5_mean_hero_damage
95,6022743793,360,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
96,6022743793,420,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
97,6022743793,480,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
98,6022743793,540,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
99,6022743793,600,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0


In [271]:
test_df

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,r1_mean_hero_damage,r2_mean_hero_damage,r3_mean_hero_damage,r4_mean_hero_damage,r5_mean_hero_damage,d1_mean_hero_damage,d2_mean_hero_damage,d3_mean_hero_damage,d4_mean_hero_damage,d5_mean_hero_damage
0,6022659410,0,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,6022743793,360,0,3,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
96,6022743793,420,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
97,6022743793,480,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0
98,6022743793,540,0,2,3,3,2,3,3,1,...,0,0,0,0,0,0,0,0,0,0


In [290]:
test_df.loc[[2]].match_id.values[0]

6022659410

In [272]:
test_df.shape

(100, 369)

<h2>Вставляем статистики

In [273]:
get_match_players(6022659410)

{'players_ids': [119898344,
  101642221,
  86940305,
  130271765,
  162641196,
  337575662,
  104512126,
  100616105,
  105610776,
  187619311]}

In [292]:
players_4k[0]

{'match_id': 6022659410,
 'account_id': 119898344,
 'kills': 6,
 'deaths': 3,
 'assists': 1,
 'gold': 879,
 'last_hits': 289,
 'denies': 14,
 'gold_per_min': 582,
 'hero_damage': 12572}

In [305]:
mean_stats = get_mean_stats_for_players(matches_4k)

In [313]:
matches_4k[0]['match_id']

6022659410

In [314]:
get_match_players(match_id)

{'players_ids': [130271765,
  162641196,
  337575662,
  104512126,
  100616105,
  105610776,
  187619311,
  337575662,
  104512126,
  105610776]}

In [316]:
for player_id in get_match_players(match_id)['players_ids']:
    try:
        print(mean_stats[player_id])
    except KeyError:
        print(0)

0
{'kills': 4.869565217391305, 'deaths': 5.586956521739131, 'assists': 10.978260869565217, 'gold': 1328.5434782608695, 'last_hits': 171.0, 'denies': 10.76086956521739, 'gold_per_min': 431.19565217391306, 'hero_damage': 16544.0, 'matches_count': 46.0}
{'kills': 6.773333333333333, 'deaths': 3.44, 'assists': 8.493333333333334, 'gold': 2642.9866666666667, 'last_hits': 356.84, 'denies': 10.36, 'gold_per_min': 642.6666666666666, 'hero_damage': 26158.133333333335, 'matches_count': 75.0}
{'kills': 8.527027027027026, 'deaths': 4.418918918918919, 'assists': 10.108108108108109, 'gold': 2097.4054054054054, 'last_hits': 293.7972972972973, 'denies': 7.9324324324324325, 'gold_per_min': 561.7162162162163, 'hero_damage': 28308.932432432433, 'matches_count': 74.0}
{'kills': 2.546666666666667, 'deaths': 7.84, 'assists': 13.386666666666667, 'gold': 1658.08, 'last_hits': 44.49333333333333, 'denies': 2.56, 'gold_per_min': 283.2133333333333, 'hero_damage': 11182.733333333334, 'matches_count': 75.0}
{'kills':

In [318]:
dota4k_matches

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,6022659410,0,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145523,6174722110,2220,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,7650.0,0.0,0.0,0.0
145524,6174722110,2280,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145525,6174722110,2340,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145526,6174722110,2400,0,0,0,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0


In [319]:
def insert_mean_stats_rows(df):
    for i in range(df.shape[0]):
        match_id = df.loc[[i]].match_id.values[0]
        match_players = get_match_players(match_id)
        for col__ in cols:
            col_type = col__.split('_')
            current_player_id = match_players['players_ids'][int(col_type[0][1]) - 1]

            # account_id
            try:
                if col_type[-1] == 'id':
                    df.at[i, col__] = current_player_id
                    continue

                # mean_kills
                elif col_type[-1] == 'kills':
                    kills = mean_stats[current_player_id]['kills']
                    df.at[i, col__] = kills
                    continue

                # mean_deaths
                elif col_type[-1] == 'deaths':
                    deaths = mean_stats[current_player_id]['deaths']
                    df.at[i, col__] = deaths
                    continue

                # mean_assists
                elif col_type[-1] == 'assists':
                    assists = mean_stats[current_player_id]['assists']
                    df.at[i, col__] = assists
                    continue

                # mean_gold
                elif col_type[-1] == 'gold':
                    gold = mean_stats[current_player_id]['gold']
                    df.at[i, col__] = gold
                    continue

                # mean_last_hits
                elif col_type[-1] == 'hits':
                    last_hits = mean_stats[current_player_id]['last_hits']
                    df.at[i, col__] = last_hits
                    continue

                # mean_denies
                elif col_type[-1] == 'denies':
                    denies = mean_stats[current_player_id]['denies']
                    df.at[i, col__] = denies
                    continue

                # mean_gold_per_min
                elif col_type[-1] == 'min':
                    gold_per_min = mean_stats[current_player_id]['gold_per_min']
                    df.at[i, col__] = gold_per_min
                    continue

                # mean_hero_damage
                elif col_type[-1] == 'damage':
                    hero_damage = mean_stats[current_player_id]['hero_damage']
                    df.at[i, col__] = hero_damage
                    continue
            except KeyError:
                df.at[i, col__] = 0
    return df

In [320]:
dota4k_matches

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,6022659410,0,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145523,6174722110,2220,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,7650.0,0.0,0.0,0.0
145524,6174722110,2280,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145525,6174722110,2340,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145526,6174722110,2400,0,0,0,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0


In [321]:
import time

In [326]:
begin = time.time()
dota4k_matches = insert_mean_stats_rows(dota4k_matches)
print(f"Ended in {(time.time() - begin) / 60}")

Ended in 65.96398067474365


In [327]:
dota4k_matches

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,r1_mean_hero_damage,r2_mean_hero_damage,r3_mean_hero_damage,r4_mean_hero_damage,r5_mean_hero_damage,d1_mean_hero_damage,d2_mean_hero_damage,d3_mean_hero_damage,d4_mean_hero_damage,d5_mean_hero_damage
0,6022659410,0,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.00000,0.000000,16544.000000,24087.730159,9593.425532,13090.00000,0.000000,16544.000000
1,6022659410,60,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.00000,0.000000,16544.000000,24087.730159,9593.425532,13090.00000,0.000000,16544.000000
2,6022659410,120,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.00000,0.000000,16544.000000,24087.730159,9593.425532,13090.00000,0.000000,16544.000000
3,6022659410,180,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.00000,0.000000,16544.000000,24087.730159,9593.425532,13090.00000,0.000000,16544.000000
4,6022659410,240,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.00000,0.000000,16544.000000,24087.730159,9593.425532,13090.00000,0.000000,16544.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145523,6174722110,2220,0,0,1,1,2,1,1,1,...,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848
145524,6174722110,2280,0,0,1,1,2,1,1,1,...,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848
145525,6174722110,2340,0,0,1,1,2,1,1,1,...,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848
145526,6174722110,2400,0,0,0,1,2,1,1,1,...,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848,31946.555556,9823.848485,13364.30303,26692.090909,18148.484848


In [329]:
dota4k_matches.to_csv('dota4k-matches-with-previous-data.csv')

In [1]:
import pandas as pd

In [4]:
data = pd.read_csv('dota4k-matches-with-previous-data.csv', index_col=0)

In [15]:
data.shape

(145528, 369)

<h3>TODO: Добавить для каждого игрока column с его номером героя

In [16]:
sql_attrs = 'player_matches.match_id, player_matches.hero_id, player_matches.account_id, player_matches.player_slot'
limit = 10000
with open('players_heros.json', 'w', encoding='utf-8') as writer:
    players_dicts = []
    responses_status = []
    for offset in range(0, 40000, 10000):
        response = requests.get(
            f"https://api.opendota.com/api/explorer?sql=SELECT {sql_attrs} FROM player_matches JOIN matches using(match_id) WHERE TRUE AND matches.start_time >= extract(epoch from timestamp '2021-06-01T21:00:00.000Z') AND matches.start_time <= extract(epoch from timestamp '2021-10-01T21:00:00.000Z') ORDER BY player_matches.match_id NULLS LAST LIMIT {limit} OFFSET {offset}"
        )
        players_dicts.append(response.json())
        responses_status.append(response.status_code)
    shared_dict = []
    for dict_ in players_dicts:
        shared_dict.append(dict_)
    if all([status_code == 200 for status_code in responses_status]):
        dict_to_save = json.dumps(shared_dict)
        writer.write(dict_to_save)
    else:
        print('Something went wrong, status code is: ', response.status_code)

In [90]:
players_heros = process_players('players_heros.json')

In [91]:
players_heros[:10]

[{'match_id': 6022659410,
  'hero_id': 8,
  'account_id': 119898344,
  'player_slot': 0},
 {'match_id': 6022659410,
  'hero_id': 19,
  'account_id': 101642221,
  'player_slot': 1},
 {'match_id': 6022659410,
  'hero_id': 121,
  'account_id': 86940305,
  'player_slot': 2},
 {'match_id': 6022659410,
  'hero_id': 107,
  'account_id': 130271765,
  'player_slot': 3},
 {'match_id': 6022659410,
  'hero_id': 45,
  'account_id': 162641196,
  'player_slot': 4},
 {'match_id': 6022659410,
  'hero_id': 41,
  'account_id': 337575662,
  'player_slot': 128},
 {'match_id': 6022659410,
  'hero_id': 23,
  'account_id': 104512126,
  'player_slot': 129},
 {'match_id': 6022659410,
  'hero_id': 123,
  'account_id': 100616105,
  'player_slot': 130},
 {'match_id': 6022659410,
  'hero_id': 58,
  'account_id': 105610776,
  'player_slot': 131},
 {'match_id': 6022659410,
  'hero_id': 69,
  'account_id': 187619311,
  'player_slot': 132}]

In [35]:
def convert_player_prefix(player_prefix: ty.Union[int, str]):
    convert_radiant = {key: key + 1 for key in range(5)}
    convert_dire = {key + 127: key for key in range(1, 6)}
    
    if player_prefix in range(0, 127):
        return f'r{convert_radiant[player_prefix]}_'
    return f'd{convert_dire[player_prefix]}_'

In [36]:
convert_player_prefix(1)

'r2_'

<h3>Adding columns with player_hero_id

In [58]:
data = pd.read_csv('../dota2-4k-matches.csv', index_col=0)

In [59]:
data_test = copy.deepcopy(data)

In [60]:
data_test.head()

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,6022659410,0,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0


In [61]:
data_test.shape

(145528, 279)

In [62]:
data_test[data_test['match_id'] == 6046319155]

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost


In [63]:
data_test = data_test.drop(list(range(14389, 14427))).reset_index(drop=True)

In [64]:
data_test[14387:]

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
14387,6046275234,2040,1,1,2,3,2,3,3,1,...,1.0,0.0,5030.0,1000.0,1065.0,450.0,3680.0,0.0,2700.0,0.0
14388,6046275234,2100,1,1,2,3,2,3,3,1,...,1.0,0.0,5030.0,1000.0,1065.0,450.0,3680.0,0.0,2700.0,0.0
14389,6046399362,0,0,3,3,3,2,3,3,1,...,0.0,0.0,140.0,0.0,260.0,0.0,0.0,0.0,0.0,0.0
14390,6046399362,60,0,3,3,3,2,3,3,1,...,0.0,0.0,140.0,0.0,260.0,0.0,0.0,0.0,0.0,0.0
14391,6046399362,120,0,3,3,3,2,3,3,1,...,0.0,0.0,140.0,0.0,260.0,675.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145485,6174722110,2220,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,7650.0,0.0,0.0,0.0
145486,6174722110,2280,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145487,6174722110,2340,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145488,6174722110,2400,0,0,0,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0


In [65]:
data_test[data_test['match_id'] == 6046319155]

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost


In [66]:
def convert_player_prefix(player_prefix: ty.Union[int, str]):
    convert_radiant = {key: key + 1 for key in range(5)}
    convert_dire = {key + 127: key for key in range(1, 6)}
    
    if player_prefix in range(0, 127):
        return f'r{convert_radiant[player_prefix]}_'
    return f'd{convert_dire[player_prefix]}_'

In [67]:
convert_player_prefix(1) + 'hero_id'

'r2_hero_id'

In [94]:
def add_players_heros_ids(df: pd.DataFrame):
    lower = 0
    upper = 10
    match_count = 0
    matches_list = []
    new = False
    for i in range(df.shape[0]):
        match_id = df.loc[[i]].match_id.values[0]
        if match_id not in matches_list:
            if len(matches_list) == 0:
                matches_list.append(match_id)
            else:
                matches_list.append(match_id)
                new = True
        if new:
            upper += 10
            lower += 10
            
        players_heroes_ = new_players_heroes[lower:upper]
        for player_hero in players_heroes_:
            df.at[i, convert_player_prefix(player_hero['player_slot']) + 'hero_id'] = int(player_hero['hero_id'])
        new = False
    print(upper, lower)
    return df

In [95]:
data = add_players_heros_ids(data_test)

39870 39860


In [96]:
data.to_csv('dota4k-ids-heroes.csv')

In [97]:
data_new = pd.read_csv('dota4k-ids-heroes.csv')

In [104]:
data_loc = data.loc[[130000], ['match_id', 'r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id']]
data_loc

Unnamed: 0,match_id,r1_hero_id,r2_hero_id,r3_hero_id,r4_hero_id,r5_hero_id,d1_hero_id,d2_hero_id,d3_hero_id,d4_hero_id,d5_hero_id
130000,6163421404,25.0,79.0,72.0,29.0,85.0,18.0,38.0,126.0,112.0,86.0


In [102]:
data_new[data_new['match_id'] == 6111964016]

Unnamed: 0.1,Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,...,r1_hero_id,r2_hero_id,r3_hero_id,r4_hero_id,r5_hero_id,d1_hero_id,d2_hero_id,d3_hero_id,d4_hero_id,d5_hero_id
78463,78463,6111964016,0,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78464,78464,6111964016,60,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78465,78465,6111964016,120,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78466,78466,6111964016,180,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78467,78467,6111964016,240,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78468,78468,6111964016,300,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78469,78469,6111964016,360,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78470,78470,6111964016,420,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78471,78471,6111964016,480,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0
78472,78472,6111964016,540,0,3,3,3,2,3,3,...,64.0,51.0,72.0,121.0,74.0,100.0,12.0,69.0,9.0,39.0


In [99]:
convert_hero_id_to_name(68)

NameError: name 'convert_hero_id_to_name' is not defined

In [77]:
set(list(data_new['match_id']))

3987

In [86]:
len(set([i['match_id']for i in players_heros])), len(set(list(data_new['match_id'])))

(4000, 3987)

In [85]:
no_matches = set([i['match_id']for i in players_heros]) - set(list(data_new['match_id']))
no_matches

{6046319155,
 6046389369,
 6056286110,
 6056844308,
 6057040170,
 6078908851,
 6108242257,
 6108341989,
 6108454608,
 6111217969,
 6119901330,
 6131172066,
 6132511769}

In [81]:
data_new[data_new['match_id'] == 6046319155]

Unnamed: 0.1,Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,...,r1_hero_id,r2_hero_id,r3_hero_id,r4_hero_id,r5_hero_id,d1_hero_id,d2_hero_id,d3_hero_id,d4_hero_id,d5_hero_id


In [92]:
new_players_heroes = []
for player_hero_info in players_heros:
    if player_hero_info['match_id'] not in no_matches:
        new_players_heroes.append(player_hero_info)

In [93]:
len(set([i['match_id']for i in new_players_heroes])), len(set(list(data_new['match_id'])))

(3987, 3987)

<h3>Проблемные игроки: players_heros[3980:3990]

In [105]:
players_heros = process_players('players_heros.json')

In [106]:
len(players_heros)

40000

In [250]:
# copy_ = players_heros[3980:3990]

In [256]:
# players_heros[:3981]

In [262]:
# players_heros[3990:]

In [275]:
players_heros = players_heros[:3980] + players_heros[3990:]

In [55]:
6046319155 in [i['match_id']for i in players_heros]

True

<h3>Матча нету в БД: 6046319155

In [56]:
data[data['match_id'] == 6046319155]

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost


39880 39870

In [57]:
data[14390:]

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
14390,6046389369,60,0,3,3,3,2,3,3,1,...,0.0,0.0,0.0,0.0,310.0,0.0,0.0,0.0,0.0,0.0
14391,6046389369,120,0,3,3,3,2,3,3,1,...,0.0,0.0,0.0,0.0,310.0,675.0,0.0,0.0,0.0,0.0
14392,6046389369,180,0,3,3,3,2,3,3,1,...,0.0,0.0,295.0,0.0,310.0,675.0,0.0,0.0,0.0,0.0
14393,6046389369,240,0,3,3,3,2,3,3,1,...,0.0,0.0,295.0,0.0,310.0,1180.0,0.0,0.0,0.0,0.0
14394,6046389369,300,0,3,3,3,2,3,3,1,...,0.0,0.0,1015.0,0.0,310.0,1685.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145523,6174722110,2220,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,7650.0,0.0,0.0,0.0
145524,6174722110,2280,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145525,6174722110,2340,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145526,6174722110,2400,0,0,0,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0


In [288]:
data.to_csv('dota2-4k-matches-heroes-id.csv')

<h2>Heroes data

In [16]:
heroes_data = requests.get('https://api.opendota.com/api/heroes')
heroes_data.status_code

200

In [17]:
heroes_data = heroes_data.json()
heroes_data[:2]

[{'id': 1,
  'name': 'npc_dota_hero_antimage',
  'localized_name': 'Anti-Mage',
  'primary_attr': 'agi',
  'attack_type': 'Melee',
  'roles': ['Carry', 'Escape', 'Nuker'],
  'legs': 2},
 {'id': 2,
  'name': 'npc_dota_hero_axe',
  'localized_name': 'Axe',
  'primary_attr': 'str',
  'attack_type': 'Melee',
  'roles': ['Initiator', 'Durable', 'Disabler', 'Jungler', 'Carry'],
  'legs': 2}]

<h3>Add data to dict with key as hero_id for fast searching

In [18]:
heroes_data_dict = {}
for hero_data in heroes_data:
    heroes_data_dict[hero_data['id']] = hero_data

In [19]:
heroes_data_dict[4]

{'id': 4,
 'name': 'npc_dota_hero_bloodseeker',
 'localized_name': 'Bloodseeker',
 'primary_attr': 'agi',
 'attack_type': 'Melee',
 'roles': ['Carry', 'Disabler', 'Jungler', 'Nuker', 'Initiator'],
 'legs': 2}

In [37]:
data_loc = data.loc[[20000], ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id']]
data_loc

Unnamed: 0,r1_hero_id,r2_hero_id,r3_hero_id,r4_hero_id,r5_hero_id,d1_hero_id,d2_hero_id,d3_hero_id,d4_hero_id,d5_hero_id
20000,129.0,107.0,47.0,121.0,1.0,112.0,58.0,16.0,114.0,46.0


In [21]:
list(data_loc.items())[0][1].values[0]

8.0

In [22]:
def convert_hero_id_to_name(hero_id: ty.Union[int, float]):
    return heroes_data_dict[hero_id]['name']

In [23]:
for hero_data in data_loc.items():
    hero_id = hero_data[1].values[0]
    print(convert_hero_id_to_name(hero_id))

npc_dota_hero_juggernaut
npc_dota_hero_tiny
npc_dota_hero_grimstroke
npc_dota_hero_earth_spirit
npc_dota_hero_pugna
npc_dota_hero_faceless_void
npc_dota_hero_kunkka
npc_dota_hero_hoodwink
npc_dota_hero_enchantress
npc_dota_hero_doom_bringer


In [282]:
data_loc = data.loc[[14390], ['r1_hero_id', 'r2_hero_id', 'r3_hero_id', 'r4_hero_id', 'r5_hero_id', 'd1_hero_id', 'd2_hero_id', 'd3_hero_id', 'd4_hero_id', 'd5_hero_id']]
data_loc

Unnamed: 0,r1_hero_id,r2_hero_id,r3_hero_id,r4_hero_id,r5_hero_id,d1_hero_id,d2_hero_id,d3_hero_id,d4_hero_id,d5_hero_id
14390,26.0,52.0,15.0,28.0,91.0,51.0,46.0,96.0,110.0,13.0


In [283]:
for hero_data in data_loc.items():
    hero_id = hero_data[1].values[0]
    print(convert_hero_id_to_name(hero_id))

npc_dota_hero_lion
npc_dota_hero_leshrac
npc_dota_hero_razor
npc_dota_hero_slardar
npc_dota_hero_wisp
npc_dota_hero_rattletrap
npc_dota_hero_templar_assassin
npc_dota_hero_centaur
npc_dota_hero_phoenix
npc_dota_hero_puck


<h2>Проблемный матч 14390

In [93]:
import requests

In [105]:
resp = requests.get('https://api.opendota.com/api/matches/6046275234')
resp.status_code

200

In [109]:
convert_hero_id_to_name(129)

'npc_dota_hero_mars'

In [107]:
resp.json()['draft_timings']

[{'order': 1,
  'pick': False,
  'active_team': 3,
  'hero_id': 88,
  'player_slot': None,
  'extra_time': 130,
  'total_time_taken': 19},
 {'order': 2,
  'pick': False,
  'active_team': 3,
  'hero_id': 29,
  'player_slot': None,
  'extra_time': 130,
  'total_time_taken': 22},
 {'order': 3,
  'pick': False,
  'active_team': 2,
  'hero_id': 46,
  'player_slot': None,
  'extra_time': 130,
  'total_time_taken': 6},
 {'order': 4,
  'pick': False,
  'active_team': 3,
  'hero_id': 49,
  'player_slot': None,
  'extra_time': 130,
  'total_time_taken': 20},
 {'order': 5,
  'pick': True,
  'active_team': 2,
  'hero_id': 13,
  'player_slot': 4,
  'extra_time': 130,
  'total_time_taken': 10},
 {'order': 6,
  'pick': True,
  'active_team': 3,
  'hero_id': 111,
  'player_slot': 5,
  'extra_time': 129,
  'total_time_taken': 31},
 {'order': 7,
  'pick': True,
  'active_team': 3,
  'hero_id': 61,
  'player_slot': 6,
  'extra_time': 130,
  'total_time_taken': 1},
 {'order': 8,
  'pick': True,
  'active_

In [2]:
import pandas as pd

In [8]:
data = pd.read_csv('dota4k-matches-with-previous-data.csv', index_col=0)

In [9]:
data.head(25)

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,r1_mean_hero_damage,r2_mean_hero_damage,r3_mean_hero_damage,r4_mean_hero_damage,r5_mean_hero_damage,d1_mean_hero_damage,d2_mean_hero_damage,d3_mean_hero_damage,d4_mean_hero_damage,d5_mean_hero_damage
0,6022659410,0,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
5,6022659410,300,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
6,6022659410,360,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
7,6022659410,420,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
8,6022659410,480,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0
9,6022659410,540,0,3,3,3,2,3,3,1,...,24087.730159,9593.425532,13090.0,0.0,16544.0,24087.730159,9593.425532,13090.0,0.0,16544.0


In [10]:
data.shape

(145528, 369)