In [1]:
import json
import requests
import re
from IPython import display
from datetime import datetime
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import pandas as pd
import openpyxl
import time

In [2]:
FIREBASE_PRIVATE_KEY = r'C:\Users\johnp\Documents\ESportsAnalytics\esportsanalytics-3b04cf0c1d47.json'
BASE_URL = 'https://api.opendota.com/api/'
TEAMS = 'teams'
HEROES = 'heroes'
PROPLAYERS = 'proPlayers'
PROMATCHES = 'proMatches'
MATCHES = 'matches'
PLAYERS = 'players'
HEROES_STATS = 'heroStats'


In [3]:
# Connect to Firebase and return a database instance
def connecttofirebase(PATH):
    
    # Check if the app is already initialised, if not then connect
    if not firebase_admin._apps:
        cred = credentials.Certificate(FIREBASE_PRIVATE_KEY)
        firebase_admin.initialize_app(cred)
    
    # Get an instance of the project database
    db = firestore.client()
    
    return db

In [4]:
db = connecttofirebase(FIREBASE_PRIVATE_KEY)

In [5]:
db.project

'esportsanalytics'

# Data Extraction

In [6]:
def get_all_teams_from_firebase():
    
     # get the data from Firebase
    doc_ref = db.collection(u'dota').document(u'all_teams')
    doc = doc_ref.get()
    
    # get the upload time from Firebase
    datetime_upload_str = doc.to_dict()['date_time_upload']
    datetime_upload = datetime.strptime(datetime_upload_str, "%m/%d/%Y, %H:%M:%S")

    # subtracting two datetime objects result in a datetime.timedelta object
    data_age_timedelta = datetime.now() - datetime_upload
    data_age_in_days = data_age_timedelta.total_seconds() / 86400
    
    print('Data is:', data_age_in_days, 'old')
    
    # Get the data from Firebase
    team_data_json = doc.to_dict()['all_teams_info']
    
    return team_data_json

In [7]:
def get_all_teams_from_opendota():
    
    response = requests.get(BASE_URL + TEAMS)

    try:
        team_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
    
    else:
        return team_data_json
    
    return 0

In [8]:
def update_all_teams_in_firebase():
    
    response = requests.get(BASE_URL + TEAMS)

    try:
        team_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)

    else:
        # upload the team_data_json in Firebase if there are no errors
        data = {
            u'date_time_upload' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
            u'all_teams_info' : team_data_json
        }

        db.collection(u'dota').document(u'all_teams').set(data)
        print("All Teams data updated in Firebase")

In [9]:
# Save the JSON as a file in the working directory
def save_all_teams_data_to_file(all_teams_data_json):
    
    with open('all_teams_data.json', 'w', encoding='utf-8') as f:
        json.dump(all_teams_data_json, f, ensure_ascii=False, indent=4)

In [10]:
def get_all_heroes_from_firebase():
    
    # get the data from Firebase
    doc_ref = db.collection(u'dota').document(u'all_heroes')
    doc = doc_ref.get()
    
    # get the upload time from Firebase
    datetime_upload_str = doc.to_dict()['date_time_upload']
    datetime_upload = datetime.strptime(datetime_upload_str, "%m/%d/%Y, %H:%M:%S")

    # subtracting two datetime objects result in a datetime.timedelta object
    data_age_timedelta = datetime.now() - datetime_upload
    data_age_in_days = data_age_timedelta.total_seconds() / 86400
    
    print('Data is:', data_age_in_days, 'old')
    
    # Get the data from Firebase
    heroes_data_json = doc.to_dict()['all_heroes_info']
    
    return heroes_data_json

In [11]:
def get_all_heroes_from_opendota():
    response = requests.get(BASE_URL + HEROES)

    try:
        heroes_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
    
    else:
        return heroes_data_json
    
    return 0

In [12]:
def update_all_heroes_in_firebase():
    
    response = requests.get(BASE_URL + HEROES)

    try:
        heroes_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)

    else:
        # upload the team_data_json in Firebase if there are no errors
        data = {
            u'date_time_upload' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
            u'all_teams_info' : heroes_data_json
        }

        db.collection(u'dota').document(u'all_heroes').set(data)
        print("All Heroes data updated in Firebase")

In [13]:
# Save the JSON as a file in the working directory
def save_all_heroes_data_to_file(all_heroes_data_json):
    
    with open('all_heroes_data.json', 'w', encoding='utf-8') as f:
        json.dump(all_heroes_data_json, f, ensure_ascii=False, indent=4)

In [14]:
def get_all_pro_players_in_opendota():
    
    response = requests.get(BASE_URL + PROPLAYERS)

    try:
        all_pro_players_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
        
    else:
        return all_pro_players_data_json
    
    return 0

In [15]:
def get_all_pro_players_in_firebase():
    all_pro_players_data_json = []
    
    firebase_doc_list = [
        u'all_pro_players_1',
        u'all_pro_players_2',
        u'all_pro_players_3'
    ]
    
    for index, item in enumerate(firebase_doc_list):
        
        doc_ref = db.collection(u'dota').document(item)
        doc = doc_ref.get()
        
        # Get the data from Firebase
        all_pro_players_data_json += doc.to_dict()['all_pro_players_info_'+str(index+1)]
    
    return all_pro_players_data_json

In [16]:
def update_all_pro_players_in_firebase():

    # Do these if data doesn't exist yet or is not up to date
    response = requests.get(BASE_URL + PROPLAYERS)

    try:
        pro_players_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)

    else:

        pro_players_split = [
            {u'all_pro_players_info_1' : pro_players_data_json[0:1000], u'date_time_upload' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),},
            {u'all_pro_players_info_2' : pro_players_data_json[1000:2000], u'date_time_upload' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),},
            {u'all_pro_players_info_3' : pro_players_data_json[2000:], u'date_time_upload' : datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),},

        ]

        for index, data in enumerate(pro_players_split):
            pro_players_ref = db.collection(u'dota').document(u'all_pro_players_'+str(index+1)).set(data)

        print("All Pro Players data updated in Firebase")
    

In [17]:
# Save the JSON as a file in the working directory
def save_all_pro_players_data_to_file(all_pro_players_data_json):
    
    with open('all_pro_players_data.json', 'w', encoding='utf-8') as f:
        json.dump(all_pro_players_data_json, f, ensure_ascii=False, indent=4)

In [18]:
def get_opendota_api_info():
    
    response = requests.get(BASE_URL)

    try:
        opendota_api_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
        
    else:
        return opendota_api_data_json
    
    return 0

In [19]:
def get_match_info(match_id):
    response = requests.get(BASE_URL + MATCHES + '/' + str(match_id))

    try:
        match_info_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
        
    else:
        return match_info_json
    
    return 0

In [20]:
def get_all_heroes_stats():
    response = requests.get(BASE_URL + HEROES_STATS)

    try:
        all_heroes_stats_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
        
    else:
        return all_heroes_stats_data_json
    
    return 0

# Data Search

In [21]:
def search_regex(search_string, name_from_list):
    try:
        x = re.search(search_string, name_from_list, re.IGNORECASE).group()
    except Exception:
        return False
    return x

In [22]:
def search_item(item_to_search, item_key, data_source):
    item_filter =  filter(lambda y: search_regex(item_to_search, y[item_key]), data_source)
    return [item_info for item_info in item_filter]

In [23]:
all_teams_json = get_all_teams_from_opendota()

In [24]:
x = search_item('MONKEY', 'name', all_teams_json)
x

[{'team_id': 2519319,
  'rating': 1389,
  'wins': 45,
  'losses': 16,
  'last_match_time': 1445985009,
  'name': '(monkey) Business',
  'tag': '(monkey)',
  'logo_url': 'https://steamusercontent-a.akamaihd.net/ugc/383162071968605685/B670804CDB13F184D122ACBB8F75E2DB2C959CAF/'},
 {'team_id': 7937261,
  'rating': 1079.93,
  'wins': 23,
  'losses': 27,
  'last_match_time': 1596792574,
  'name': 'Monkey2',
  'tag': '',
  'logo_url': 'https://steamusercontent-a.akamaihd.net/ugc/1679241899327735038/A629AA9E790DA12A10356D936D1C20D4AE146579/'}]

In [25]:
def get_team_matches(team_id):
    
    response = requests.get(BASE_URL + TEAMS + '/' + str(team_id) + '/' + MATCHES)

    try:
        team_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
    
    else:
        return team_data_json
    
    return 0

In [26]:
def get_player_info(account_id):
    response = requests.get(BASE_URL + PLAYERS + '/' + str(account_id))

    try:
        player_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
    
    else:
        return player_data_json
    
    return 0

In [27]:
team_fnatic_matches = get_team_matches(350190)

In [28]:
len(team_fnatic_matches)

2081

In [29]:
team_og_matches = get_team_matches(2586976)

In [30]:
len(team_og_matches)

1335

In [31]:
team_og_matches[1330]

{'match_id': 1912135868,
 'radiant_win': True,
 'radiant': True,
 'duration': 1423,
 'start_time': 1446573057,
 'leagueid': 3865,
 'league_name': 'ASUS ROG DreamLeague Season 4',
 'cluster': 133,
 'opposing_team_id': 758797,
 'opposing_team_name': 'MONKEY FREEDOM FIGHTERS',
 'opposing_team_logo': 'https://steamusercontent-a.akamaihd.net/ugc/450653562083541278/409E73DC29CC511034EBABDB9CD181EFBF7A2CEC/'}

In [32]:
def combine_team_matches_data(team_matches_json, all_teams_json, predict_team_id):  
    matches_total = len(team_matches_json)
    count = 1
    
    for index, value in enumerate(team_matches_json):

        # Check if we already maxed out our requests for that minute (only 60 requests per minute on the free tier)
        if (count%5 == 0):
            for i in range(60, -1, -1):
                print('Exceeded max API calls, please wait:', i, 'seconds                                                       ', end = '\r')
                time.sleep(1)

        elif (index%5 == 0 and index != 0):

            match_info = team_matches_json[index]
            
            # 1 x API CALL 
            match_info_detailed = get_match_info(match_info['match_id'])

            # 5 x API CALL (5 players)
            predict_players = [{'account_id' : player['account_id'], 
                                'name' : player['name'],
                                'player_stats' : get_player_info(player['account_id'])
                               } for player in match_info_detailed['players'] if player['isRadiant'] == match_info['radiant']]

            # 5 x API CALL (5 players)
            opposing_players = [{'account_id' : player['account_id'], 
                                 'name' : player['name'],
                                 'player_stats' : get_player_info(player['account_id'])
                                } for player in match_info_detailed['players'] if player['isRadiant'] != match_info['radiant']]

            team_matches_json[index]['predict_players'] = predict_players
            team_matches_json[index]['opposing_players'] = opposing_players

            opposing_team = [{'opposing_team_wins_total' : team['wins'], 
                              'opposing_team_losses_total' : team['losses'], 
                              'opposing_team_rating' : team['rating']} for team in all_teams_json if team['team_id'] == match_info['opposing_team_id']]
            
            predict_team = [{'predict_team_wins_total' : team['wins'], 
                              'predict_team_losses_total' : team['losses'], 
                              'predict_team_rating' : team['rating'],
                              'predict_team_name' : team['name'], 
                              'predict_team_id' : team['team_id']} for team in all_teams_json if team['team_id'] == predict_team_id]
            
            team_matches_json[index]['opposing_team_stat'] = opposing_team
            team_matches_json[index]['predict_team_stat'] = predict_team

            print('match number:', index, 'downloaded', matches_total - index, 'matches left to go', round((index / matches_total) * 100, 2),'% complete', end = '\r')
            
            count += 1
        
    return team_matches_json

# read file
with open(r'C:\Users\johnp\team_og_matches.json', 'r', encoding="cp866") as input_file:

    data = input_file.read()
    structure = json.loads(data)

with open('team_og_matches.json', 'w', encoding='utf-8') as f:
    json.dump(team_og_matches, f, ensure_ascii=False, indent=4)

In [33]:
all_heroes = get_all_heroes_from_opendota()

In [34]:
all_pro_players = get_all_pro_players_in_opendota()

In [35]:
og_v_fnatic_match_info = get_match_info(1912135868)

In [36]:
len(og_v_fnatic_match_info)

46

In [37]:
for key in og_v_fnatic_match_info:
    print(key)

match_id
barracks_status_dire
barracks_status_radiant
chat
cluster
cosmetics
dire_score
dire_team_id
draft_timings
duration
engine
first_blood_time
game_mode
human_players
leagueid
lobby_type
match_seq_num
negative_votes
objectives
picks_bans
positive_votes
radiant_gold_adv
radiant_score
radiant_team_id
radiant_win
radiant_xp_adv
skill
start_time
teamfights
tower_status_dire
tower_status_radiant
version
replay_salt
series_id
series_type
league
radiant_team
dire_team
players
patch
region
all_word_counts
my_word_counts
throw
loss
replay_url


In [38]:
og_v_fnatic_match_info['radiant_team']

{'team_id': 2586976,
 'name': 'OG',
 'tag': 'OG',
 'logo_url': 'https://steamcdn-a.akamaihd.net/apps/dota2/images/team_logos/2586976.png'}

In [None]:
players[0]['name']

In [None]:
[p_keys for p_keys in players[0]]

In [None]:
team_og_matches_df.head()

In [None]:
for i in range(10):
    
    print('name:', players[i]['name'], 'lane role:', players[i]['lane_role'], 'accountid:',  players[i]['account_id'], 'lane efficiency:', players[i]['lane_efficiency_pct'], 'kda:', players[i]['kda'],'rank_tier:', players[i]['rank_tier'], 'is radiant:', players[i]['isRadiant'])

In [40]:
taiga = get_player_info('401792574')
taiga['mmr_estimate']['estimate']

6386

In [None]:
[stat for stat in get_player_info(401792574)]

In [41]:
def get_player_account_id_from_match_id(match_id, isRadiant):
    opposing_team = []
    prediction_team = []
    
    match_info_json = get_match_info(match_id)
    all_players = match_info_json['players']
    
    for i in range(10):
        player_info = get_player_info(all_players[i]['account_id'])
        
        temp = {'name' : all_players[i]['name'],
                'account_id' : all_players[i]['account_id'],
                'mmr_estimate' : player_info['mmr_estimate']['estimate'],
                'rank_tier' : player_info['rank_tier'],
                'leaderboard_rank' : player_info['leaderboard_rank'],
                'solo_competitive_rank' : player_info['solo_competitive_rank']}
                                      
        # if the 'isRadiant' is equal to isRadiant then the player is on the prediction team
        if (all_players[i]['isRadiant'] == isRadiant):
                                          
            prediction_team.append(temp)
        else:                          
            opposing_team.append(temp)
            
    return {'opposing_team' : opposing_team, 'prediction_team' : prediction_team}

In [42]:
def get_team_players(team_id):
    response = requests.get(BASE_URL + TEAMS + '/' + str(team_id) + '/' + PLAYERS)
    print(response)

    try:
        team_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
    
    else:
        return team_data_json
    
    return 0

In [43]:
team_aster = get_team_players(6209166)

<Response [200]>


In [44]:
team_aster

[{'account_id': 129958758,
  'name': 'Xxs',
  'games_played': 930,
  'wins': 515,
  'is_current_team_member': True},
 {'account_id': 207829314,
  'name': 'BoBoKa',
  'games_played': 897,
  'wins': 504,
  'is_current_team_member': True},
 {'account_id': 148215639,
  'name': 'Monet',
  'games_played': 465,
  'wins': 275,
  'is_current_team_member': True},
 {'account_id': 113435203,
  'name': 'ChYuaN',
  'games_played': 328,
  'wins': 166,
  'is_current_team_member': False},
 {'account_id': 373520478,
  'name': 'White丶Album_白学家',
  'games_played': 279,
  'wins': 166,
  'is_current_team_member': False},
 {'account_id': 149486894,
  'name': 'Sccc丶',
  'games_played': 265,
  'wins': 133,
  'is_current_team_member': False},
 {'account_id': 182331313,
  'name': 'Frisk',
  'games_played': 265,
  'wins': 133,
  'is_current_team_member': False},
 {'account_id': 89423756,
  'name': 'LaNm',
  'games_played': 248,
  'wins': 145,
  'is_current_team_member': True},
 {'account_id': 183378746,
  'name':

In [45]:
[item['account_id'] for item in team_aster if item['is_current_team_member'] == False]

[113435203,
 373520478,
 149486894,
 182331313,
 108382060,
 113800818,
 140153524,
 248941611,
 134711350,
 166458146,
 158847773,
 89871557,
 137272985,
 191458152,
 136178375,
 124168390,
 90031225,
 221743617,
 182439266,
 196389151]

In [46]:
def get_player_peers(account_id):
    response = requests.get(BASE_URL + PLAYERS + '/' + str(account_id) + '/peers')
    print(response)

    try:
        player_data_json = json.loads(response.content.decode('utf-8'))

    except requests.exceptions.Timeout:
        print('request is taking too long to complete, possible timeout, getting old data from Firebase')

    except requests.exceptions.RequestException as e:
        raise SystemExit(e)
    
    else:
        return player_data_json
    
    return 0

In [47]:
team_og_players = get_team_players(2586976)

<Response [200]>


In [48]:
team_og_players[0]

{'account_id': 19672354,
 'name': 'N0tail',
 'games_played': 1150,
 'wins': 709,
 'is_current_team_member': False}

In [49]:
player = search_item('taiga', 'name', all_pro_players)

In [50]:
[item for item in all_heroes if item['id'] == 74]

[{'id': 74,
  'name': 'npc_dota_hero_invoker',
  'localized_name': 'Invoker',
  'primary_attr': 'int',
  'attack_type': 'Ranged',
  'roles': ['Carry', 'Nuker', 'Disabler', 'Escape', 'Pusher'],
  'legs': 2}]

In [51]:
player

[{'account_id': 401792574,
  'steamid': '76561198362058302',
  'avatar': 'https://avatars.akamai.steamstatic.com/fede1355e88a5ffae6e9bfc20de4ec33a18ef5d1.jpg',
  'avatarmedium': 'https://avatars.akamai.steamstatic.com/fede1355e88a5ffae6e9bfc20de4ec33a18ef5d1_medium.jpg',
  'avatarfull': 'https://avatars.akamai.steamstatic.com/fede1355e88a5ffae6e9bfc20de4ec33a18ef5d1_full.jpg',
  'profileurl': 'https://steamcommunity.com/id/Taigadota/',
  'personaname': 'twitch.tv/taiga_',
  'last_login': '2017-10-31T05:53:29.049Z',
  'full_history_time': '2022-09-14T18:14:56.663Z',
  'cheese': 0,
  'fh_unavailable': True,
  'loccountrycode': 'NO',
  'last_match_time': '2022-08-28T10:37:20.000Z',
  'plus': True,
  'name': 'Taiga',
  'country_code': 'no',
  'fantasy_role': 2,
  'team_id': 2586976,
  'team_name': 'OG',
  'team_tag': 'OG',
  'is_locked': True,
  'is_pro': True,
  'locked_until': None}]

In [52]:
taiga_account_id = 401792574

# What makes a team win matches?

-players? Team composition

-draft pick? Hero composition

-items? Item-Hero compatibility

-time? Match duration

# What makes a specific team win?

Since you can't include team names in the dataset, teams should be represented numerically with information that excludes historical match performance data (because new data pre-hero-draft won't include match data of course)

The only way I can think of is to use that specific match's player's historical performance data leading up to that match as a representation of the teams e.g. 

Team OG Matches from 2015:

	    start_date	        opposing_team_name	    opposing_team_id	league_name	leagueid	        cluster	radiant_win	radiant	duration	match_id	match_outcome
        
1328	2015-11-04 04:50:57	MONKEY FREEDOM FIGHTERS	758797	            ASUS ROG DreamLeague Season 4	3865	133	True	True	1423	1912135868	True

1329	2015-11-03 08:09:58	Mamas-Boys	            2276247	            ASUS ROG DreamLeague Season 4	3865	138	True	True	1092	1910208777	True

For match id 1912135868, include all 5 player performance data from the lead up to that 2015 match and don't include future data beyond 2015 even if we have it!

# Figure out which data to use to quantify player performance

e.g. lane efficiency, MMR, years experience, team cohesion

# Figure out which data to use to quantify team performance

e.g. team fight success rate, team brand goodwill? Team Rank

1. Get Team A vs B Match Info --> Get Players --> Get Player Account IDs --> Get Historical Team Win Rate together

In [None]:
opendota_api_info = get_opendota_api_info()

In [None]:
taiga = get_player_info('401792574')
taiga

In [None]:
ammar = get_player_info(183719386)
print(ammar['mmr_estimate']['estimate'])
print(ammar['leaderboard_rank'])

In [None]:
taiga_peers = get_player_peers(401792574)
[item for item in taiga_peers if item['account_id'] == 183719386]

In [None]:
team_og_matches_df[team_og_matches_df.opposing_team_id == 350190]

In [None]:
team_og_matches_df.shape

In [None]:
teams = get_player_account_id_from_match_id(6727040019, False)

In [None]:
teams['prediction_team']