<h1>Данные для анализа: профессиональные игры с 1 июня по 1 октября 2021 года

In [1]:
import json
from typing import Union

import requests
import pandas as pd
import numpy as np

from IPython.display import display, HTML

In [2]:
def process_matches(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        matches = json.loads(file.read())
    matches = [row for row in matches['rows']]
    return matches

In [3]:
def process_players(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        players = json.loads(file.read())
    players = [row for row in players['rows']]
    return players

In [4]:
def get_items(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        items = json.loads(file.read())
    return items

In [5]:
matches = process_matches('matches/matches.json')
players = process_players('players/players.json')

In [6]:
matches[25]['match_id']

6023817373

In [7]:
matches_sql_attrs = ['matches.match_id', 'match_patch.patch', 'matches.start_time', 'matches.radiant_win', 'matches.duration', 'matches.first_blood_time', 'matches.radiant_team_complete', 'matches.dire_team_complete', 'matches.radiant_score', 'matches.dire_score', 'matches.objectives', 'matches.teamfights', 'matches.radiant_gold_adv', 'matches.radiant_xp_adv']
matches_available_attrs = [attr.split('.')[1] for attr in matches_sql_attrs]
matches_available_attrs

['match_id',
 'patch',
 'start_time',
 'radiant_win',
 'duration',
 'first_blood_time',
 'radiant_team_complete',
 'dire_team_complete',
 'radiant_score',
 'dire_score',
 'objectives',
 'teamfights',
 'radiant_gold_adv',
 'radiant_xp_adv']

In [8]:
# check if we download all players for all matches
players_by_match = {}
for match in matches:
    match_id = match['match_id']
    match_players = [player['hero_id'] for player in players if player['match_id'] == match_id]
    players_by_match[match_id] = match_players
all([player for player in players_by_match.values()])

True

<h2>Workaround with first match

In [9]:
match_id = matches[0]['match_id']
match_id

6022659410

In [10]:
match_players = [player for player in players if player['match_id'] == match_id]
len(match_players)

10

In [11]:
match_player_1 = match_players[0]
match_player_1.keys()

dict_keys(['match_id', 'player_slot', 'hero_id', 'firstblood_claimed', 'xp_t', 'gold_t', 'times', 'dn_t', 'lh_t', 'purchase_log', 'buyback_log', 'kills_log', 'runes_log'])

In [12]:
dataframe = pd.DataFrame(match_players)
for key in list(dataframe.columns):
    if key not in list(dataframe.columns)[:4]:   
        del dataframe[key]
dataframe

Unnamed: 0,match_id,player_slot,hero_id,firstblood_claimed
0,6022659410,0,8,1
1,6022659410,1,19,0
2,6022659410,2,121,0
3,6022659410,3,107,0
4,6022659410,4,45,0
5,6022659410,128,41,0
6,6022659410,129,23,0
7,6022659410,130,123,0
8,6022659410,131,58,0
9,6022659410,132,69,0


<h2>Finding the longest match (in seconds)

In [13]:
longest_match_duration = max([match['duration'] for match in matches])
longest_match_duration

3775

In [14]:
longest_match_id = [match['match_id'] for match in matches if match['duration'] == longest_match_duration][0]
longest_match_id

6023556932

<h2>Configure data for each player

In [15]:
match_1 = matches[0]
match_1.keys()

dict_keys(['match_id', 'patch', 'chat', 'radiant_win', 'duration', 'first_blood_time', 'radiant_score', 'dire_score', 'objectives', 'teamfights', 'radiant_gold_adv', 'radiant_xp_adv'])

In [46]:
# chat length for each match
chat_length = []
for match in matches:
    chat_length.append(len(match['chat']))
sum(chat_length)

6577

In [17]:
import collections

In [18]:
collection = collections.Counter()
for slot in [player['player_slot'] for player in players if player]:
    collection[slot] += 1
collection

Counter({0: 100,
         1: 100,
         2: 100,
         3: 100,
         4: 100,
         128: 100,
         129: 100,
         130: 100,
         131: 100,
         132: 100})

In [19]:
def convert_player_prefix(player_prefix: Union[int, str]):
    convert_radiant = {key: key + 1 for key in range(5)}
    convert_dire = {key + 127: key for key in range(1, 6)}
    
    if player_prefix in range(0, 127):
        return f'r{convert_radiant[player_prefix]}_'
    return f'd{convert_dire[player_prefix]}_'

In [20]:
def get_player_prefix(player: dict):
    convert_radiant = {key: key + 1 for key in range(5)}
    convert_dire = {key + 127: key for key in range(1, 6)}
    
    player_slot = player['player_slot']
    player_prefix = f'r{convert_radiant[player_slot]}_' if player_slot in range(0, 128) else \
        f'd{convert_dire[player_slot]}_'
    
    return player_prefix

In [21]:
# data by time (by minute for every match)
def make_by_minute_data_for_player(player: dict):
    player_prefix = get_player_prefix(player)
    
    fields_to_process = ['xp_t', 'gold_t', 'dn_t', 'lh_t']
    data_by_time = {}
    for field in fields_to_process:
        data_by_time[player_prefix + field] = player[field]
    time_df = pd.DataFrame(data_by_time)
    time_df['time'] = player['times']
    df = time_df.set_index('time')
    
    return df

In [22]:
make_by_minute_data_for_player(match_player_1)

Unnamed: 0_level_0,r1_xp_t,r1_gold_t,r1_dn_t,r1_lh_t
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0,0,0
60,143,647,1,4
120,339,951,3,9
180,531,1239,5,14
240,880,1559,5,19
300,1339,1850,6,24
360,1742,2027,6,26
420,2293,2661,7,34
480,2510,2923,9,38
540,3139,3453,10,42


<h3>Extract data from player purchasing data information

In [23]:
data_to_log = ['purchase_log', 'buyback_log', 'kills_log', 'runes_log']
additional_data_to_log = ...

In [24]:
match_player_1['purchase_log']

[{'time': -89, 'key': 'slippers'},
 {'time': -89, 'key': 'tango'},
 {'time': -89, 'key': 'circlet'},
 {'time': -89, 'key': 'branches'},
 {'time': -89, 'key': 'quelling_blade'},
 {'time': 12, 'key': 'magic_stick'},
 {'time': 53, 'key': 'wraith_band'},
 {'time': 86, 'key': 'tango'},
 {'time': 87, 'key': 'branches'},
 {'time': 87, 'key': 'branches'},
 {'time': 153, 'key': 'magic_wand'},
 {'time': 169, 'key': 'boots'},
 {'time': 290, 'key': 'blades_of_attack'},
 {'time': 291, 'key': 'tango'},
 {'time': 390, 'key': 'chainmail'},
 {'time': 396, 'key': 'phase_boots'},
 {'time': 397, 'key': 'tpscroll'},
 {'time': 463, 'key': 'boots_of_elves'},
 {'time': 599, 'key': 'blade_of_alacrity'},
 {'time': 603, 'key': 'tpscroll'},
 {'time': 717, 'key': 'yasha'},
 {'time': 842, 'key': 'ultimate_orb'},
 {'time': 858, 'key': 'clarity'},
 {'time': 868, 'key': 'tpscroll'},
 {'time': 907, 'key': 'manta'},
 {'time': 962, 'key': 'blade_of_alacrity'},
 {'time': 965, 'key': 'clarity'},
 {'time': 966, 'key': 'clar

In [25]:
items = get_items('items/items.json')

In [26]:
def get_quality_collection(items_):
    # qual, cost
    list_qual = []
    for item, data in items_.items():
        if 'qual' in data.keys():
            list_qual.append(data['qual'])
            
    qual_collection = collections.Counter()
    for qual in list_qual:
        qual_collection[qual] += 1
        
    return list(qual_collection.keys())

In [27]:
def encode_item(bought_item: dict, player_prefix: str): 
    result = {}
    
    qual_collection = get_quality_collection(items)
    count = [f'{player_prefix}{item}_count' for item in qual_collection]
    costs = [f'{player_prefix}{item}_cost' for item in qual_collection]
    
    for i in (count + costs):
        result[i] = 0

    cost = 0 if (not 'cost' in bought_item) else bought_item['cost']
    qual = 'null' if(not 'qual' in bought_item) else bought_item['qual']
    
    result[player_prefix + qual + '_count'] = 1
    result[player_prefix + qual + '_cost'] = cost
    
    return result

In [28]:
longest_match_duration = longest_match_duration
purchased_items = []
purchased_items_time = [purchase_log['time'] for purchase_log in match_player_1['purchase_log']]

for purchased_item in match_player_1['purchase_log']:
    item_ = items[purchased_item['key']]
    item_ = encode_item(item_, get_player_prefix(match_player_1))
    
    purchased_items.append(item_)

# all purchases during match
all_purchases = pd.DataFrame(data=purchased_items)
all_purchases['time'] = purchased_items_time

# only purchases before match begin
purchases_before_begin = all_purchases[all_purchases['time'] <= 0].sum()
purchases_before_begin['time'] = 0

purchases_total = pd.DataFrame(purchases_before_begin).T

# make dataframe for all purchasing during the match in cumulative sum
for time_ in range(60, longest_match_duration + 60, 60):
    purchases_time_slice = all_purchases[(time_ - 60 < all_purchases['time']) & (all_purchases['time'] <= time_)].sum()
    purchases_time_slice['time'] = time_
    purchases_total = purchases_total.append(purchases_time_slice, ignore_index=True)
 

columns_ = list(purchases_total.columns)[:-1]      
purchases_total = pd.concat([purchases_total[columns_].cumsum(), purchases_total['time']], axis=1)
purchases_total = purchases_total.set_index('time')
purchases_total

Unnamed: 0_level_0,r1_component_count,r1_secret_shop_count,r1_consumable_count,r1_common_count,r1_rare_count,r1_epic_count,r1_artifact_count,r1_component_cost,r1_secret_shop_cost,r1_consumable_cost,r1_common_cost,r1_rare_cost,r1_epic_cost,r1_artifact_cost
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0.0,3.0,0.0,2.0,0.0,0.0,0.0,0.0,425.0,0.0,140.0,0.0,0.0,0.0,0.0
60.0,4.0,0.0,2.0,1.0,0.0,0.0,0.0,625.0,0.0,140.0,505.0,0.0,0.0,0.0
120.0,4.0,0.0,5.0,1.0,0.0,0.0,0.0,625.0,0.0,330.0,505.0,0.0,0.0,0.0
180.0,5.0,0.0,5.0,2.0,0.0,0.0,0.0,1125.0,0.0,330.0,955.0,0.0,0.0,0.0
240.0,5.0,0.0,5.0,2.0,0.0,0.0,0.0,1125.0,0.0,330.0,955.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3540.0,14.0,2.0,17.0,3.0,2.0,1.0,2.0,9275.0,3250.0,1370.0,2455.0,5600.0,4600.0,4750.0
3600.0,14.0,2.0,17.0,3.0,2.0,1.0,2.0,9275.0,3250.0,1370.0,2455.0,5600.0,4600.0,4750.0
3660.0,14.0,2.0,17.0,3.0,2.0,1.0,2.0,9275.0,3250.0,1370.0,2455.0,5600.0,4600.0,4750.0
3720.0,14.0,2.0,17.0,3.0,2.0,1.0,2.0,9275.0,3250.0,1370.0,2455.0,5600.0,4600.0,4750.0


<h2>Gain building information

In [29]:
buildings_events = [objective_ for objective_ in match_1['objectives'] if objective_['type'] == 'building_kill']
len(buildings_events), buildings_events[:2]

(9,
 [{'time': 406,
   'type': 'building_kill',
   'unit': 'npc_dota_hero_pugna',
   'key': 'npc_dota_badguys_tower1_top',
   'slot': 4,
   'player_slot': 4},
  {'time': 652,
   'type': 'building_kill',
   'unit': 'npc_dota_creep_badguys_ranged',
   'key': 'npc_dota_goodguys_tower1_bot'}])

In [30]:
buildings_objectives = pd.DataFrame(buildings_events)
buildings_objectives

Unnamed: 0,time,type,unit,key,slot,player_slot
0,406,building_kill,npc_dota_hero_pugna,npc_dota_badguys_tower1_top,4.0,4.0
1,652,building_kill,npc_dota_creep_badguys_ranged,npc_dota_goodguys_tower1_bot,,
2,738,building_kill,npc_dota_hero_pugna,npc_dota_badguys_tower1_mid,4.0,4.0
3,839,building_kill,npc_dota_hero_juggernaut,npc_dota_badguys_tower1_bot,0.0,0.0
4,899,building_kill,npc_dota_hero_kunkka,npc_dota_goodguys_tower1_mid,6.0,129.0
5,981,building_kill,npc_dota_hero_pugna,npc_dota_badguys_tower2_top,4.0,4.0
6,1609,building_kill,npc_dota_badguys_siege,npc_dota_goodguys_tower2_bot,,
7,1733,building_kill,npc_dota_hero_kunkka,npc_dota_goodguys_tower2_mid,6.0,129.0
8,1815,building_kill,dota_unknown,npc_dota_goodguys_fort,,


<h3> Some info about building in Dota 2

Both the Radiant and the Dire have three lanes which are guarded by **three towers** each. Additionally, each faction's Ancient has **two towers** as well, resulting in a total of **11 towers** per faction.
<br>
**Barracks** (commonly shortened to Rax or Racks) are buildings, defended by their tier 3 towers, that are responsible for keeping lane creeps as powerful as their counterparts. There are **two Barracks** for each lane per faction - one for **melee creeps** (called Melee Barracks or Melee Rax), and one for **ranged creeps** (called Ranged Barracks or Ranged Rax). The ranged barracks are always located to the left of the melee barracks on each lane and both factions.
<br>
<br>
**good** - radiant side, **bad** - dire side

In [31]:
towers = {
        'tower1': 3,
        'tower2': 3,
        'tower3': 3,
        'tower4': 2,
        'melee': 3,
        'range': 3,
        'fort': 1
}

good, bad = towers.copy(), towers.copy()

def get_buildins_data(match: dict):
    buildings_events = [objective_ for objective_ in match['objectives'] if objective_['type'] == 'building_kill']
    buildings_objectives = pd.DataFrame(buildings_events)  
    
    data_for_df = []
    match_duration = match['duration']
    tower_location = buildings_objectives['key'].values
    event_time = buildings_objectives['time'].values
    result_ = []
    for time_ in range(60, match_duration + 60, 60):
        for location in tower_location:
            if time_ - 60 < event_time[i] <= time_:
                key = location.split('_')
                tower_name = key[3]
                if(key[2] == 'goodguys'):
                    good[tower_name] -= 1
                else:
                    bad[tower_name] -= 1
        row_for_df = [time_, *[value for value in good.values()], *[value for value in bad.values()]]
        data_for_df.append(row_for_df)

    buildings_df_columns = ['time', *[f'radiant_{key}' for key in good.keys()], *[f'dire_{key}' for key in bad.keys()]]
    buildings_dataframe = pd.DataFrame(data_for_df, columns=buildings_df_columns).set_index('time')
    
    return buildings_dataframe

<h2>Count Roshan kills by team

In [32]:
match_with_roshan_kill = [match 
                         for match in matches
                         if all([objective_ for objective_ in match['objectives'] if objective_['type'] == 'CHAT_MESSAGE_ROSHAN_KILL'])][0]
rosh_kill_objective = [objective_ for objective_ in match_with_roshan_kill['objectives'] if objective_['type'] == 'CHAT_MESSAGE_ROSHAN_KILL'][0]
rosh_kill_objective

{'time': 1471, 'type': 'CHAT_MESSAGE_ROSHAN_KILL', 'team': 3}

In [33]:
def get_roshan_kill_data(match: dict):
    roshan_events = [objective_ for objective_ in match['objectives'] if objective_['type'] == 'CHAT_MESSAGE_ROSHAN_KILL']
    roshan_objectives = pd.DataFrame(roshan_events)  
    
    data_for_df = []
    match_duration = match['duration']
    
    roshan_radiant_count, roshan_dire_count = 0, 0
    row_for_df = []
    for time_ in range(60, match_duration + 60, 60):
        roshan_radiant_count, roshan_dire_count = 0, 0
        for event in roshan_events:
            if time_ - 60 < event['time'] <= time_:
                if (event['team'] == 2):
                    roshan_radiant_count += 1
                elif (event['team'] == 3):
                    roshan_dire_count += 1
        row_for_df.append([time_, roshan_radiant_count, roshan_dire_count])
    
    roshan_kills_df = pd.DataFrame(row_for_df, columns=['time', 'radiant_roshan_kill', 'dire_roshan_kill']).set_index('time')
    
    return roshan_kills_df
    

In [34]:
df = get_roshan_kill_data(match_with_roshan_kill)
df[df['dire_roshan_kill'] == 1]

Unnamed: 0_level_0,radiant_roshan_kill,dire_roshan_kill
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1500,0,1


<h2>Count players which got aegis after killed roshan

In [35]:
rosh_kill_player_ = [objective_ for objective_ in match_with_roshan_kill['objectives'] if objective_['type'] == 'CHAT_MESSAGE_AEGIS']
rosh_kill_player_

[{'time': 1471, 'type': 'CHAT_MESSAGE_AEGIS', 'slot': 5, 'player_slot': 128}]

In [36]:
player_killed_rosh = [player for player in players if player['match_id'] == match_with_roshan_kill['match_id'] and player['player_slot'] == rosh_kill_player_[0]['player_slot']]
player_killed_rosh[0]['player_slot']

128

In [37]:
get_player_prefix(player_killed_rosh[0])

'd1_'

In [38]:
def get_roshan_killed_by_player_data(match: dict):
    aegis_events = [objective_ for objective_ in match['objectives'] if objective_['type'] == 'CHAT_MESSAGE_AEGIS']
    aegis_objectives = pd.DataFrame(aegis_events)  
    
    data_for_df = []
    match_duration = match['duration']
    
    prefix_dire = [f'd{prefix}_' for prefix in range(1, 6)]
    prefix_radiant = [f'r{prefix}_' for prefix in range(1, 6)]
    prefixes = prefix_dire + prefix_radiant
    
    result = {}
    result_list = []
    for time_ in range(60, match_duration + 60, 60):
        for prefix_ in prefixes:
            result[prefix_ + 'aegis_count'] = 0
        for aegis_took in aegis_events:
            if time_ - 60 < aegis_took['time'] <= time_:
                player_slot_ = convert_player_prefix(aegis_took['player_slot'])
                result[player_slot_ + 'aegis_count'] += 1
        
        result_list.append([time_, *[value for value in result.values()]])
    
    columns_ = ['time', *[key for key in result.keys()]]
    aegis_took = pd.DataFrame(result_list, columns=columns_).set_index('time')
    
    return aegis_took

In [39]:
get_roshan_killed_by_player_data(match_with_roshan_kill)

Unnamed: 0_level_0,d1_aegis_count,d2_aegis_count,d3_aegis_count,d4_aegis_count,d5_aegis_count,r1_aegis_count,r2_aegis_count,r3_aegis_count,r4_aegis_count,r5_aegis_count
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
60,0,0,0,0,0,0,0,0,0,0
120,0,0,0,0,0,0,0,0,0,0
180,0,0,0,0,0,0,0,0,0,0
240,0,0,0,0,0,0,0,0,0,0
300,0,0,0,0,0,0,0,0,0,0
360,0,0,0,0,0,0,0,0,0,0
420,0,0,0,0,0,0,0,0,0,0
480,0,0,0,0,0,0,0,0,0,0
540,0,0,0,0,0,0,0,0,0,0
600,0,0,0,0,0,0,0,0,0,0


<h2>Make by minute statistics for kills/deaths per player