In [9]:
import pandas as pd
import config
from tqdm import tqdm
import requests

In [2]:
data = pd.read_csv('raw_matches.csv')

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5038 entries, 0 to 5037
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   match_id       5038 non-null   int64  
 1   player_slot    5038 non-null   int64  
 2   radiant_win    5038 non-null   bool   
 3   game_mode      5038 non-null   int64  
 4   patch          5038 non-null   int64  
 5   duration       5038 non-null   int64  
 6   lobby_type     5038 non-null   int64  
 7   hero_id        5038 non-null   int64  
 8   start_time     5038 non-null   int64  
 9   version        4960 non-null   float64
 10  kills          5038 non-null   int64  
 11  deaths         5038 non-null   int64  
 12  assists        5038 non-null   int64  
 13  skill          0 non-null      float64
 14  average_rank   5021 non-null   float64
 15  leaver_status  5038 non-null   int64  
 16  party_size     5034 non-null   float64
dtypes: bool(1), float64(4), int64(12)
memory usage: 634.

In [5]:
def get_match_data(matches):
    match_data = []
    errors = []
    for match in tqdm(matches,total=len(matches)):
        try:
            request = requests.request('GET',f"https://api.opendota.com/api/matches/{match}?api_key={config.api_key}")
            data = request.json()
            if(data['players']):
                match_data.append(data)
        except:
            errors.append(match)
    
    df = pd.DataFrame(match_data)
    
    return df, errors

In [10]:
df, errors = get_match_data(data['match_id'].values)

100%|██████████████████████████████████████████████████████████████████████████████| 5038/5038 [32:27<00:00,  2.59it/s]


In [12]:
MATCH_DROP = ['barracks_status_dire',
              'barracks_status_radiant',
              'dire_score',
              'radiant_score',
              'chat',
              'cluster',
              'cosmetics',
              'draft_timings',
              'first_blood_time',
              'human_players',
              'league',
              'match_seq_num',
              'negative_votes',
              'objectives',
              'picks_bans',
              'positive_votes',
              'radiant_gold_adv',
              'radiant_xp_adv',
              'skill',
              'start_time',
              'teamfights',
              'tower_status_dire',
              'tower_status_radiant',
              'radiant_team',
              'dire_team',
              'version',
              'players',
              'replay_salt',
              'all_word_counts',
              'my_word_counts',
              'throw',
              'loss',
              'replay_url',
              'comeback',
              'stomp']

In [13]:
compact_match_df = df.drop(MATCH_DROP,axis=1)

In [14]:
compact_match_df

Unnamed: 0,match_id,dire_team_id,duration,engine,game_mode,leagueid,lobby_type,radiant_team_id,radiant_win,series_id,series_type,patch,region
0,7343550699,8291895.0,2448,1,2,15739,1,2163.0,False,808686,1,53,3
1,7343495938,2163.0,1688,1,2,15739,1,8291895.0,True,808686,1,53,3
2,7342658368,39.0,1639,1,2,15739,1,2163.0,False,808487,1,53,3
3,7342542795,2163.0,2362,1,2,15739,1,39.0,False,808487,1,53,3
4,7342153416,2163.0,2858,1,2,15739,1,8597976.0,False,808450,1,53,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5033,7192927167,8261648.0,2351,1,2,15352,1,7300277.0,True,783499,1,52,3
5034,7190522768,8390848.0,1921,1,2,15352,1,8261648.0,True,783062,1,52,3
5035,7190444131,8261648.0,2724,1,2,15352,1,8390848.0,False,783062,1,52,3
5036,7125457650,111474.0,2686,1,2,15088,1,8944337.0,False,772532,1,52,3


In [15]:
players = df['players']

In [17]:
FANTASY_METRICS = ['match_id',
        'account_id',
        'assists',
        'camps_stacked',
        'deaths',
        'denies',
        'firstblood_claimed',
        'gold_per_min',
        'kills',
        'last_hits',
        'obs_placed',
        'roshans_killed',
        'rune_pickups',
        'stuns',
        'teamfight_participation',
        'towers_killed',
        'start_time',
        'radiant_win',
        'isRadiant',
        'win',
        'lose']

def make_player_df(players):
    all_matches = []
    for player in tqdm(players,total=len(players)):
        all_matches.append(pd.DataFrame(eval(str(player)))[FANTASY_METRICS])

    player_match_df = pd.concat(all_matches,ignore_index=True)
    
    return player_match_df

In [18]:
player_df = make_player_df(players)

100%|██████████████████████████████████████████████████████████████████████████████| 5038/5038 [07:56<00:00, 10.57it/s]


In [26]:
play = pd.read_csv('raw_players.csv')

In [30]:
ids = play['account_id'].values

In [31]:
ti_player_df = player_df[player_df['account_id'].isin(ids)]

In [34]:
ti_player_df.groupby('account_id').count().sort_values('match_id')

Unnamed: 0_level_0,match_id,assists,camps_stacked,deaths,denies,firstblood_claimed,gold_per_min,kills,last_hits,obs_placed,roshans_killed,rune_pickups,stuns,teamfight_participation,towers_killed,start_time,radiant_win,isRadiant,win,lose
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
134711350.0,19,19,14,19,19,14,19,19,19,14,14,14,14,14,14,19,19,19,19,19
164685175.0,86,86,86,86,86,86,86,86,86,86,86,86,86,86,86,86,86,86,86,86
106573901.0,93,93,88,93,93,88,93,93,93,88,88,88,88,88,88,93,93,93,93,93
103735745.0,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96,96
148526973.0,120,120,115,120,120,115,120,120,120,115,115,115,115,115,115,120,120,120,120,120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97590558.0,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602
221666230.0,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602
116934015.0,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602
16497807.0,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602


In [35]:
ti_player_df.to_csv('ti_players_match_data.csv')