<h1>Данные для анализа: профессиональные игры с 1 июня по 1 октября 2021 года

In [2]:
import json
from typing import Union, List

import requests
import pandas as pd
import numpy as np

from IPython.display import display, HTML

In [3]:
def process_matches(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        matches = json.loads(file.read())
    all_list_data = []
    for data_ in matches:
        all_list_data.append(data_['rows'])
    flat_list = [item for sublist in all_list_data for item in sublist]
    return flat_list

In [4]:
def process_players(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        players = json.loads(file.read())
    all_list_data = []
    for data_ in players:
        all_list_data.append(data_['rows'])
    flat_list = [item for sublist in all_list_data for item in sublist]
    return flat_list

In [5]:
def get_items(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        items = json.loads(file.read())
    return items

In [6]:
matches = process_matches('matches/matches.json')
players = process_players('players/players.json')

In [7]:
matches[0]['match_id']

6022659410

In [8]:
test_chat_match = matches[0]

<h2>Workaround with match chat (chat wheel)

In [35]:
def get_all_chat_wheel_phrases(file_name: str):
    with open(file_name, 'r', encoding='utf-8') as file:
        chat_wheel_phrases = json.loads(file.read())
    return chat_wheel_phrases

In [36]:
chat_wheel = get_all_chat_wheel_phrases('chat_wheel/chat_wheel.json')

In [49]:
# example
# {'time': -76, 'type': 'chatwheel', 'key': '19001', 'slot': 1, 'player_slot': 1}

In [46]:
chat_phrase = [chat_message for chat_message in test_chat_match['chat']]
message_by_player = {message['player_slot']: {} for message in chat_phrase}

for chat_message in chat_phrase:
    # chat wheel message
    if chat_message['type'] == 'chatwheel':
        message = chat_wheel.get(chat_message['key'])
        if message:
            print(message['name'], message['message'])
        else:
            print("There is no such message in opendota db")
    elif chat_message['type'] == 'chat':
        print(chat_message['key'])

tiny_laugh Hehehehehuhhehehe.
grimstroke_laugh Huh huh ha ha ha ha ha ha ha haaa!
tiny_laugh Hehehehehuhhehehe.
grimstroke_laugh Huh huh ha ha ha ha ha ha ha haaa!
MissingHero %s1 is missing!
MissingHero %s1 is missing!
MissingHero %s1 is missing!
tiny_laugh Hehehehehuhhehehe.
earth_spirit_laugh Oh ho ho ha ha ha ha ha ha!
juggernaut_1 You will be forever alone.
enchantress_1 It's just not fair.
grimstroke_laugh Huh huh ha ha ha ha ha ha ha haaa!
ok kalang?
grimstroke_laugh Huh huh ha ha ha ha ha ha ha haaa!
HAHAHA
HAHAHAHAHA
tiny_laugh Hehehehehuhhehehe.
tiny_thank Are you ready to rubble?
tangina ka
juggernaut_deny Hm, looking good.
Care Careful!
GoodJob Well played!
juggernaut_1 You will be forever alone.
juggernaut_1 You will be forever alone.
Guild_A_Net_Net_Da А нет, нет, ДА!
tiny_laugh Hehehehehuhhehehe.
tiny_laugh Hehehehehuhhehehe.
pugna_thank Ah, this is going to be good.
pugna_thank Ah, this is going to be good.
juggernaut_laugh Ha ha ha ha ha!
earth_spirit_laugh Oh ho ho ha

In [48]:
chat_phrase[0]

{'time': -76, 'type': 'chatwheel', 'key': '19001', 'slot': 1, 'player_slot': 1}

In [47]:
message_by_player

{1: {}, 2: {}, 3: {}, 0: {}, 131: {}, 4: {}, 130: {}, 129: {}}

In [7]:
matches_sql_attrs = ['matches.match_id', 'match_patch.patch', 'matches.start_time', 'matches.radiant_win', 'matches.duration', 'matches.first_blood_time', 'matches.radiant_team_complete', 'matches.dire_team_complete', 'matches.radiant_score', 'matches.dire_score', 'matches.objectives', 'matches.teamfights', 'matches.radiant_gold_adv', 'matches.radiant_xp_adv']
matches_available_attrs = [attr.split('.')[1] for attr in matches_sql_attrs]
matches_available_attrs

['match_id',
 'patch',
 'start_time',
 'radiant_win',
 'duration',
 'first_blood_time',
 'radiant_team_complete',
 'dire_team_complete',
 'radiant_score',
 'dire_score',
 'objectives',
 'teamfights',
 'radiant_gold_adv',
 'radiant_xp_adv']

In [8]:
# check if we download all players for all matches
players_by_match = {}
for match in matches:
    match_id = match['match_id']
    match_players = [player['hero_id'] for player in players if player['match_id'] == match_id]
    players_by_match[match_id] = match_players
all([player for player in players_by_match.values()])

KeyboardInterrupt: 

<h2>Workaround with first match

In [1]:
match_id = matches[0]['match_id']
match_id

NameError: name 'matches' is not defined

In [50]:
match_players = [player for player in players if player['match_id'] == match_id]
len(match_players)

10

In [51]:
match_player_1 = match_players[0]
match_player_1.keys()

dict_keys(['match_id', 'player_slot', 'hero_id', 'firstblood_claimed', 'xp_t', 'gold_t', 'times', 'dn_t', 'lh_t', 'purchase_log', 'buyback_log', 'kills_log', 'runes_log', 'obs_log', 'sen_log'])

In [None]:
dataframe = pd.DataFrame(match_players)
for key in list(dataframe.columns):
    if key not in list(dataframe.columns)[:4]:   
        del dataframe[key]
dataframe

<h2>Finding the longest match (in seconds)

In [None]:
longest_match_duration = max([match['duration'] for match in matches])
longest_match_duration

In [None]:
longest_match_id = [match['match_id'] for match in matches if match['duration'] == longest_match_duration][0]
longest_match_id

<h2>Configure data for each player

In [None]:
match_1 = matches[0]
match_1.keys()

In [None]:
# chat length for each match
chat_length = []
for match in matches:
    chat_length.append(len(match['chat']))
sum(chat_length)

In [6]:
import collections

In [7]:
collection = collections.Counter()
for slot in [player['player_slot'] for player in players if player]:
    collection[slot] += 1
collection

Counter({0: 4000,
         1: 4000,
         2: 4000,
         3: 4000,
         4: 4000,
         128: 4000,
         129: 4000,
         130: 4000,
         131: 4000,
         132: 4000})

In [8]:
def convert_player_prefix(player_prefix: Union[int, str]):
    convert_radiant = {key: key + 1 for key in range(5)}
    convert_dire = {key + 127: key for key in range(1, 6)}
    
    if player_prefix in range(0, 127):
        return f'r{convert_radiant[player_prefix]}_'
    return f'd{convert_dire[player_prefix]}_'

In [9]:
def get_player_prefix(player: dict):
    convert_radiant = {key: key + 1 for key in range(5)}
    convert_dire = {key + 127: key for key in range(1, 6)}
    
    player_slot = player['player_slot']
    player_prefix = f'r{convert_radiant[player_slot]}_' if player_slot in range(0, 128) else \
        f'd{convert_dire[player_slot]}_'
    
    return player_prefix

In [10]:
# data by time (by minute for every match)
def make_by_minute_data_for_player(player: dict):
    player_prefix = get_player_prefix(player)
    
    fields_to_process = ['xp_t', 'gold_t', 'dn_t', 'lh_t']
    data_by_time = {}
    for field in fields_to_process:
        data_by_time[player_prefix + field] = player[field]
    # maybe need to pass [data_by_time] in order to prevent 
    # ValueError: If using all scalar values, you must pass an index (in case we accidentally pass a scalar)
    time_df = pd.DataFrame(data_by_time)
    time_df['time'] = player['times']
    df = time_df.set_index('time')
    
    return df

In [52]:
make_by_minute_data_for_player(match_player_1)

Unnamed: 0_level_0,r1_xp_t,r1_gold_t,r1_dn_t,r1_lh_t
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,0,0,0
60,143,647,1,4
120,339,951,3,9
180,531,1239,5,14
240,880,1559,5,19
300,1339,1850,6,24
360,1742,2027,6,26
420,2293,2661,7,34
480,2510,2923,9,38
540,3139,3453,10,42


<h3>Extract data from player purchasing data information

In [11]:
data_to_log = ['purchase_log', 'buyback_log', 'kills_log', 'runes_log']
additional_data_to_log = ...

In [None]:
match_player_1['purchase_log']

<h4>All items presents in Dota 2

In [12]:
items = get_items('items/items.json')

In [13]:
def get_quality_collection(items_):
    # qual, cost
    list_qual = []
    for item, data in items_.items():
        if 'qual' in data.keys():
            list_qual.append(data['qual'])
            
    qual_collection = collections.Counter()
    for qual in list_qual:
        qual_collection[qual] += 1
    
    qual_collection = list(qual_collection.keys())
    qual_collection.append('null')
    return qual_collection

In [None]:
print(get_quality_collection(items))

In [None]:
test_players_1[0]['match_id']

In [14]:
def process_player_purchases(player: dict, match_duration: int):
    
    def encode_item(bought_item: dict, player_prefix: str): 
        result = {}

        qual_collection = get_quality_collection(items)
        count = [f'{player_prefix}{item}_count' for item in qual_collection]
        costs = [f'{player_prefix}{item}_cost' for item in qual_collection]

        for i in (count + costs):
            result[i] = 0

        cost = 0 if (not 'cost' in bought_item) else bought_item['cost']
        qual = 'null' if(not 'qual' in bought_item) else bought_item['qual']

        result[player_prefix + qual + '_count'] = 1
        result[player_prefix + qual + '_cost'] = cost

        return result

    # longest_match_duration = longest_match_duration
    purchased_items = []
    purchased_items_time = [purchase_log['time'] for purchase_log in player['purchase_log']]
    
    for purchased_item in player['purchase_log']:
        item_ = items[purchased_item['key']]
        item_ = encode_item(item_, get_player_prefix(player))

        purchased_items.append(item_)

    # all purchases during match
    all_purchases = pd.DataFrame(data=purchased_items)
    all_purchases['time'] = purchased_items_time

    # only purchases before match begin
    purchases_before_begin = all_purchases[all_purchases['time'] <= 0].sum()
    purchases_before_begin['time'] = 0

    purchases_total = pd.DataFrame(purchases_before_begin).T

    # make dataframe for all purchasing during the match in cumulative sum
    for time_ in range(60, match_duration + 60, 60):
        purchases_time_slice = all_purchases[(time_ - 60 < all_purchases['time']) & (all_purchases['time'] <= time_)].sum()
        purchases_time_slice['time'] = time_
        purchases_total = purchases_total.append(purchases_time_slice, ignore_index=True)


    columns_ = list(purchases_total.columns)[:-1]      
    purchases_total = pd.concat([purchases_total[columns_].cumsum(), purchases_total['time']], axis=1)
    purchases_total = purchases_total.set_index('time')
    
    return purchases_total.fillna(0)

In [2]:
process_player_purchases(test_players_1[0])

NameError: name 'test_players_1' is not defined

<h2>Gain building information

In [None]:
buildings_events = [objective_ for objective_ in match_1['objectives'] if objective_['type'] == 'building_kill']
len(buildings_events), buildings_events[:2]

In [None]:
buildings_objectives = pd.DataFrame(buildings_events)
buildings_objectives

<h3> Some info about building in Dota 2

Both the Radiant and the Dire have three lanes which are guarded by **three towers** each. Additionally, each faction's Ancient has **two towers** as well, resulting in a total of **11 towers** per faction.
<br>
**Barracks** (commonly shortened to Rax or Racks) are buildings, defended by their tier 3 towers, that are responsible for keeping lane creeps as powerful as their counterparts. There are **two Barracks** for each lane per faction - one for **melee creeps** (called Melee Barracks or Melee Rax), and one for **ranged creeps** (called Ranged Barracks or Ranged Rax). The ranged barracks are always located to the left of the melee barracks on each lane and both factions.
<br>
<br>
**good** - radiant side, **bad** - dire side

In [15]:
def get_buildings_data(match: dict):
    towers = {
        'tower1': 3,
        'tower2': 3,
        'tower3': 3,
        'tower4': 2,
        'melee': 3,
        'range': 3,
        'fort': 1
    }

    good, bad = towers.copy(), towers.copy()
    
    buildings_events = [objective_ for objective_ in match['objectives'] if objective_['type'] == 'building_kill']
    buildings_objectives = pd.DataFrame(buildings_events)  
    
    data_for_df = []
    match_duration = match['duration']
    tower_location = buildings_objectives['key'].values
    event_time = buildings_objectives['time'].values
    result_ = []
    for time_ in range(0, match_duration + 60, 60):
        for location in tower_location:
            event_time = \
                buildings_objectives.loc[buildings_objectives['key'] == location]['time'].values[0]
            if time_ - 60 < event_time <= time_:
                key = location.split('_')
                tower_name = key[3]
                if(key[2] == 'goodguys'):
                    good[tower_name] -= 1
                else:
                    bad[tower_name] -= 1
        row_for_df = [time_, *[value for value in good.values()], *[value for value in bad.values()]]
        data_for_df.append(row_for_df)

    buildings_df_columns = ['time', *[f'radiant_{key}' for key in good.keys()], *[f'dire_{key}' for key in bad.keys()]]
    buildings_dataframe = pd.DataFrame(data_for_df, columns=buildings_df_columns).set_index('time')
    
    return buildings_dataframe

In [None]:
get_buildings_data(test_match_1)[:4]

<h2>Count Roshan kills by team

In [None]:
match_with_roshan_kill = [match 
                         for match in matches
                         if all([objective_ for objective_ in match['objectives'] if objective_['type'] == 'CHAT_MESSAGE_ROSHAN_KILL'])][0]
rosh_kill_objective = [objective_ for objective_ in match_with_roshan_kill['objectives'] if objective_['type'] == 'CHAT_MESSAGE_ROSHAN_KILL'][0]
rosh_kill_objective

In [16]:
def get_roshan_kill_data(match: dict):
    roshan_events = [objective_ for objective_ in match['objectives'] if objective_['type'] == 'CHAT_MESSAGE_ROSHAN_KILL']
    roshan_objectives = pd.DataFrame(roshan_events)  
    
    data_for_df = []
    match_duration = match['duration']
    
    roshan_radiant_count, roshan_dire_count = 0, 0
    row_for_df = []
    for time_ in range(0, match_duration + 60, 60):
        roshan_radiant_count, roshan_dire_count = 0, 0
        for event in roshan_events:
            if time_ - 60 < event['time'] <= time_:
                if (event['team'] == 2):
                    roshan_radiant_count += 1
                elif (event['team'] == 3):
                    roshan_dire_count += 1
        row_for_df.append([time_, roshan_radiant_count, roshan_dire_count])
    
    roshan_kills_df = pd.DataFrame(row_for_df, columns=['time', 'radiant_roshan_kill', 'dire_roshan_kill']).set_index('time')
    
    return roshan_kills_df
    

In [None]:
df = get_roshan_kill_data(match_with_roshan_kill)
df[df['dire_roshan_kill'] == 1]

<h2>Count players which got aegis after killed roshan

In [None]:
rosh_kill_player_ = [objective_ for objective_ in match_with_roshan_kill['objectives'] if objective_['type'] == 'CHAT_MESSAGE_AEGIS']
rosh_kill_player_

In [None]:
player_killed_rosh = [player for player in players if player['match_id'] == match_with_roshan_kill['match_id'] and player['player_slot'] == rosh_kill_player_[0]['player_slot']]
player_killed_rosh[0]['player_slot']

In [None]:
get_player_prefix(player_killed_rosh[0])

In [17]:
def get_roshan_killed_by_player_data(match: dict):
    aegis_events = [objective_ for objective_ in match['objectives'] if objective_['type'] == 'CHAT_MESSAGE_AEGIS']
    aegis_objectives = pd.DataFrame(aegis_events)  
    
    data_for_df = []
    match_duration = match['duration']
    
    prefix_dire = [f'd{prefix}_' for prefix in range(1, 6)]
    prefix_radiant = [f'r{prefix}_' for prefix in range(1, 6)]
    prefixes = prefix_dire + prefix_radiant
    
    result = {}
    result_list = []
    for time_ in range(0, match_duration + 60, 60):
        for prefix_ in prefixes:
            result[prefix_ + 'aegis_count'] = 0
        for aegis_took in aegis_events:
            if time_ - 60 < aegis_took['time'] <= time_:
                player_slot_ = convert_player_prefix(aegis_took['player_slot'])
                result[player_slot_ + 'aegis_count'] += 1
        
        result_list.append([time_, *[value for value in result.values()]])
    
    columns_ = ['time', *[key for key in result.keys()]]
    aegis_took = pd.DataFrame(result_list, columns=columns_).set_index('time')
    
    return aegis_took

In [3]:
get_roshan_killed_by_player_data(match_with_roshan_kill)

NameError: name 'get_roshan_killed_by_player_data' is not defined

<h2>Make by minute statistics for runes, items purchases, buybacks and kills/deaths per each player

In [18]:
data_to_log = ['purchase_log', 'buyback_log', 'kills_log', 'runes_log', 'obs_log', 'sen_log']

In [5]:
test_duration = match_1['duration']
test_duration

NameError: name 'match_1' is not defined

In [30]:
def make_by_minute_stats_log_data_for_player(player: dict, event, match_duration: Union[int, str]):
    player_prefix = get_player_prefix(player)
    times_event_occured = [event_['time'] for event_ in player[event]]
    
    count = 0
    to_remove = []
    end_list = []
    for time_ in range(0, match_duration + 60, 60):
        for event_time in times_event_occured:
            if time_ - 60 < event_time <= time_:
                count += 1
                to_remove.append(event_time)
        for time__ in to_remove:
            times_event_occured.remove(time__)
        to_remove = []
        end_list.append([time_, count])
    stats_dataframe = pd.DataFrame(end_list, columns=['time', player_prefix + event, ]).set_index('time')
    
    return stats_dataframe

In [6]:
shared_df = pd.DataFrame([time_ for time_ in range(0, test_duration + 60, 60)], columns=['time']).set_index('time')
for event_log in data_to_log[1:]:
    data = make_by_minute_stats_log_data_for_player(match_player_1, event_log)
    shared_df = shared_df.join(data)
shared_df.fillna(0)

NameError: name 'pd' is not defined

<h3>Creating full dataframe for list of players

In [7]:
test_player = match_player_1['match_id']
test_player, test_duration

NameError: name 'match_player_1' is not defined

In [23]:
def create_empty_date_dataframe(time_range) -> pd.DataFrame:
    return pd.DataFrame([time_ for time_ in range(0, time_range + 60, 60)], columns=['time']).set_index('time')

In [34]:
def create_players_dataframe_data(players_list: List[dict], match_duration: Union[int, str]):
    players_dataframe = create_empty_date_dataframe(match_duration)
    for player in players_list:
        player_by_minute_data = make_by_minute_data_for_player(player)
        
        # TODO: add function to create player_purchases dataframe at once
        player_purchases = process_player_purchases(player, match_duration)
        
        # player log data
        for event_log in data_to_log[1:]:
            data = make_by_minute_stats_log_data_for_player(player, event_log, match_duration)
            players_dataframe = players_dataframe.join(data)
        players_dataframe.fillna(0)
        
        # join separate dataframes into one shared
        players_dataframe = players_dataframe.join(player_by_minute_data).join(player_purchases)
    
    return players_dataframe.fillna(0)

<h3>Full dataframe for one match and 10 players

In [8]:
test_match = match_1
test_match['match_id'], test_match['duration']

NameError: name 'match_1' is not defined

In [32]:
def create_match_dataframe_data(match: dict, players_list: List[dict]):
    # match duration
    match_duration = match['duration']
    shared_df = create_empty_date_dataframe(match_duration)
    times_index = shared_df.index.values
    shared_df['match_id'] = np.array([int(match['match_id'])] * len(times_index))
    shared_df['radiant_win'] = np.array([int(match['radiant_win'])] * len(times_index))
    
    # data for buildings, roshan, aegis, players
    
    # get_buildings_data
    # get_roshan_kill_data
    # get_roshan_killed_by_player_data
    # create_players_dataframe_data
    
    buildings_data = get_buildings_data(match)
    roshan_kills_data = get_roshan_kill_data(match)
    roshan_kill_by_player_data = get_roshan_killed_by_player_data(match)
    player_dataframe_data = create_players_dataframe_data(players_list, match_duration)
    shared_df = \
        shared_df.join(buildings_data).join(roshan_kills_data).join(roshan_kill_by_player_data).join(player_dataframe_data)
    
    shared_df = shared_df.reset_index()
    shared_df = shared_df[['match_id', 'time', *list(shared_df.columns)[2:]]]

    return shared_df.fillna(0)

In [None]:
test_match_1 = matches[0]
test_match_1['match_id']

In [9]:
test_players_1 = players[:10]
len([player['match_id'] for player in test_players_1])

NameError: name 'players' is not defined

In [None]:
test_data = create_match_dataframe_data(test_match_1, test_players_1)

In [None]:
test_data.head()

<h3>Creating full dataframe for all matches

In [27]:
import time

In [37]:
time_start = time.time()
lower = 0
upper = 10
match_count = 0
shared_df_all = pd.DataFrame()
for match in matches:
    try:
        print(f"Count: {match_count}, match_id: {match['match_id']}")
        # TypeError: 'NoneType' object is not iterable - case if match['objectives'] doesnt even exist
        if match.get('objectives') is None:
            print(f"Count: {match_count}, match_id: {match['match_id']} - match with null objectives object")
            lower += 10
            upper += 10
            continue

        players_list = players[lower:upper]
        match_dataframe = create_match_dataframe_data(match, players_list)
        shared_df_all = shared_df_all.append(match_dataframe, ignore_index=True)
        lower += 10
        upper += 10
        match_dataframe = None
        match_count += 1
    except Exception as e:
        print(f"Exception occured! Message: {e}")
shared_df_all = shared_df_all.fillna(0)
time.time() - time_start

Count: 0, match_id: 6022659410
Count: 1, match_id: 6022701284
Count: 2, match_id: 6022711300
Count: 3, match_id: 6022743793
Count: 4, match_id: 6022758673
Count: 5, match_id: 6022762549
Count: 6, match_id: 6022789415
Count: 7, match_id: 6022802265
Count: 8, match_id: 6022884300
Count: 9, match_id: 6023083239
Count: 10, match_id: 6023084699
Count: 11, match_id: 6023110186
Count: 12, match_id: 6023183153
Count: 13, match_id: 6023208208
Count: 14, match_id: 6023211594
Count: 15, match_id: 6023330168
Count: 16, match_id: 6023332232
Count: 17, match_id: 6023437931
Count: 18, match_id: 6023444829
Count: 19, match_id: 6023556932
Count: 20, match_id: 6023574998
Count: 21, match_id: 6023662185
Count: 22, match_id: 6023673163
Count: 23, match_id: 6023752731
Count: 24, match_id: 6023755332
Count: 25, match_id: 6023817373
Count: 26, match_id: 6023821096
Count: 27, match_id: 6023828679
Count: 28, match_id: 6023865423
Count: 29, match_id: 6023949092
Count: 30, match_id: 6023990768
Count: 31, match_i

9780.110123634338

In [40]:
9780/60 / 60

2.716666666666667

<h3>Final dataset

In [16]:
shared_df_all

NameError: name 'shared_df_all' is not defined

In [54]:
len(shared_df_all['radiant_win'].values)

145528

<h4>Total number of matches in df

In [43]:
len(set(list(shared_df_all['match_id'].values)))

3988

In [42]:
shared_df_all.to_csv('dota2-4k-matches.csv')

In [45]:
shared_df_all.radiant_win.value_counts()

1    74398
0    71130
Name: radiant_win, dtype: int64

In [47]:
list(shared_df_all.columns)

['match_id',
 'time',
 'radiant_win',
 'radiant_tower1',
 'radiant_tower2',
 'radiant_tower3',
 'radiant_tower4',
 'radiant_melee',
 'radiant_range',
 'radiant_fort',
 'dire_tower1',
 'dire_tower2',
 'dire_tower3',
 'dire_tower4',
 'dire_melee',
 'dire_range',
 'dire_fort',
 'radiant_roshan_kill',
 'dire_roshan_kill',
 'd1_aegis_count',
 'd2_aegis_count',
 'd3_aegis_count',
 'd4_aegis_count',
 'd5_aegis_count',
 'r1_aegis_count',
 'r2_aegis_count',
 'r3_aegis_count',
 'r4_aegis_count',
 'r5_aegis_count',
 'r1_buyback_log',
 'r1_kills_log',
 'r1_runes_log',
 'r1_obs_log',
 'r1_sen_log',
 'r1_xp_t',
 'r1_gold_t',
 'r1_dn_t',
 'r1_lh_t',
 'r1_component_count',
 'r1_secret_shop_count',
 'r1_consumable_count',
 'r1_common_count',
 'r1_rare_count',
 'r1_epic_count',
 'r1_artifact_count',
 'r1_null_count',
 'r1_component_cost',
 'r1_secret_shop_cost',
 'r1_consumable_cost',
 'r1_common_cost',
 'r1_rare_cost',
 'r1_epic_cost',
 'r1_artifact_cost',
 'r1_null_cost',
 'r2_buyback_log',
 'r2_kills

In [14]:
# two debug matches
# match with null objectives object = 6046319155
# next match with cause a ValueError exception = 6046389369

In [15]:
before_null_match = [match for match in matches if match['match_id'] == 6046275234][0]
# before_null_match

NameError: name 'matches' is not defined

In [None]:
null_objectives = [match for match in matches if match['match_id'] == 6046319155][0]
null_objectives

In [None]:
exception_match = [match for match in matches if match['match_id'] == 6046389369][0]
# exception_match

In [16]:
two_matches = [before_null_match, null_objectives, exception_match]

NameError: name 'before_null_match' is not defined

In [None]:
before_match_players = [player for player in players if player['match_id'] == 6046275234]
first_match_players = [player for player in players if player['match_id'] == 6046319155]
second_match_players = [player for player in players if player['match_id'] == 6046389369]

In [None]:
two_players = before_match_players + first_match_players + second_match_players

In [17]:
null_objectives = [match for match in matches if match['match_id'] == 6046319155][0]
if null_objectives.get('objectives') and not null_objectives['objectives']:
    print("True")

NameError: name 'matches' is not defined

In [18]:
if null_objectives.get('objectives') is None:
    print("True")
else:
    print("False")

NameError: name 'null_objectives' is not defined

In [None]:
NaN_values = shared_df_all.loc[(shared_df_all['match_id'] == 6023083239) & (shared_df_all['time'] == 0)]
for field, nAn_value in NaN_values.items():
    if nan_value

In [None]:
null_element = shared_df_all.loc[(shared_df_all['match_id'] == 6023083239) & (shared_df_all['time'] == 0)]['d5_secret_shop_cost'].values[0]
pd.isna(null_element), null_element

In [19]:
player_nAn = [player for player in players if player['match_id'] == 6023083239][0]
player_nAn['purchase_log']

NameError: name 'players' is not defined

In [None]:
dataframe__ = shared_df_all.loc[(shared_df_all['match_id'] == 6023083239) & (shared_df_all['time'] == 0)]
dataframe__

In [None]:
type(dataframe__)

In [274]:
dataframe__.fillna(0)

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
281,6023083239,0,1,3,3,3,2,3,3,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<h2>Train models

In [3]:
from sklearn.model_selection import train_test_split
import pandas as pd

In [19]:
data = pd.read_csv('dota2-4k-matches.csv', index_col=0)
data

Unnamed: 0,match_id,time,radiant_win,radiant_tower1,radiant_tower2,radiant_tower3,radiant_tower4,radiant_melee,radiant_range,radiant_fort,...,d5_artifact_count,d5_null_count,d5_component_cost,d5_secret_shop_cost,d5_consumable_cost,d5_common_cost,d5_rare_cost,d5_epic_cost,d5_artifact_cost,d5_null_cost
0,6022659410,0,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,190.0,0.0,0.0,0.0,0.0,0.0
1,6022659410,60,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
2,6022659410,120,0,3,3,3,2,3,3,1,...,0.0,0.0,305.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
3,6022659410,180,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
4,6022659410,240,0,3,3,3,2,3,3,1,...,0.0,0.0,805.0,0.0,320.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145523,6174722110,2220,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,7650.0,0.0,0.0,0.0
145524,6174722110,2280,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145525,6174722110,2340,0,0,1,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0
145526,6174722110,2400,0,0,0,1,2,1,1,1,...,0.0,0.0,9675.0,2000.0,1055.0,550.0,11850.0,0.0,0.0,0.0


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.base import clone

X = data.drop(columns=['radiant_win'])
y = data['radiant_win']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=124)

In [11]:
from sklearn.preprocessing import StandardScaler
stdX = StandardScaler().fit_transform(X)
stdX_train, stdX_test, stdy_train, stdy_test = train_test_split(stdX, y, test_size=0.25, random_state=42)

In [14]:
from pytorch_tabnet.tab_model import TabNetClassifier

clf = TabNetClassifier()
clf.fit(
    stdX_train, stdy_train,
    eval_metric=['accuracy'],
    max_epochs=25
)

Device used : cpu
No early stopping will be performed, last training weights will be used.
epoch 0  | loss: 0.6859  |  0:00:22s
epoch 1  | loss: 0.61757 |  0:00:45s
epoch 2  | loss: 0.5647  |  0:01:07s
epoch 3  | loss: 0.52685 |  0:01:31s
epoch 4  | loss: 0.50601 |  0:01:53s
epoch 5  | loss: 0.48904 |  0:02:16s
epoch 6  | loss: 0.46993 |  0:02:39s
epoch 7  | loss: 0.45217 |  0:03:02s
epoch 8  | loss: 0.4316  |  0:03:25s
epoch 9  | loss: 0.41366 |  0:03:47s
epoch 10 | loss: 0.40002 |  0:04:12s
epoch 11 | loss: 0.38531 |  0:04:36s
epoch 12 | loss: 0.36476 |  0:04:59s
epoch 13 | loss: 0.35341 |  0:05:24s
epoch 14 | loss: 0.34278 |  0:05:48s
epoch 15 | loss: 0.33186 |  0:06:11s
epoch 16 | loss: 0.32668 |  0:06:34s
epoch 17 | loss: 0.32098 |  0:06:58s
epoch 18 | loss: 0.30765 |  0:07:22s
epoch 19 | loss: 0.29912 |  0:07:46s
epoch 20 | loss: 0.2956  |  0:08:09s
epoch 21 | loss: 0.28724 |  0:08:31s
epoch 22 | loss: 0.28195 |  0:08:53s
epoch 23 | loss: 0.28274 |  0:09:15s
epoch 24 | loss: 0.27

In [15]:
preds = clf.predict(stdX_test)
(preds == stdy_test).mean()

0.8589137485569788

In [58]:
from sklearn.linear_model import LogisticRegression

In [59]:
model = LogisticRegression()

In [None]:
model.fit()