In [1]:
# Only put working code here

# standard imports
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np

#reading in data
df_match_out = pd.read_csv('../host/repos/capstone1/data/match_outcomes.csv')
df_match = pd.read_csv('../host/repos/capstone1/data/match.csv')
df_player_time = pd.read_csv('../host/repos/capstone1/data/player_time.csv')
df_objectives = pd.read_csv('../host/repos/capstone1/data/objectives.csv')


def TeamGoldTotals(match_id, time_index=-1):
    '''
    params:
    match_id = int corresponding to the match_id column
    time_index = index to grab gold totals at, default is the last timestamp of the match
    
    returns:
    2 ints of gold totals of team 1 and 2 at the end of the match.
    '''
    
    match_view = df_player_time[df_player_time['match_id']==match_id]
    team1 = (match_view.iloc[time_index][2] +
            match_view.iloc[time_index][5] +
            match_view.iloc[time_index][8] +
            match_view.iloc[time_index][11] +
            match_view.iloc[time_index][14])
    
    team2 = (match_view.iloc[time_index][17] +
            match_view.iloc[time_index][20] +
            match_view.iloc[time_index][23] +
            match_view.iloc[time_index][26] +
            match_view.iloc[time_index][29])
    
    return team1,team2

def GoldValsThruMatch(match_id):
    '''
    params:
    match_id = int corresponding to the match_id column
    
    returns 2 np.arrays with team gold values at each time interval
    and a np.array of list of time intervals in minutes(for graphing)
    '''
    
    match_df = df_player_time[df_player_time['match_id']==match_id]
    team1 = []
    team2 = []
    
    for i in range(len(match_df.index)):
        t1, t2 = TeamGoldTotals(match_id, i)
        team1.append(t1)
        team2.append(t2)
    
    return np.array(team1),np.array(team2), np.array(match_df['times']//60)


def GetMatchWinner(match_id):
    '''
    params:
    match_id=int of match_id primary key
    
    returns:
    string of team that won and string for plotting color. Team 1 corresponds to radiant, Team 2 corresponds to dire.
    '''
    #the indexes of df_match are idential to the match_id's in ascending order
    specific_match = df_match.iloc[match_id]
    if specific_match['radiant_win']:
        return 'Team 1','green'
    else:
        return 'Team 2','red'

def GetRoshanTeamKills ():
#     df_objectives[df_objectives['subtype'] == 'CHAT_MESSAGE_ROSHAN_KILL']['player1']
    roshdf = df_objectives[df_objectives['subtype'] == 'CHAT_MESSAGE_ROSHAN_KILL']

    roshkill_wins = {'rad_kills' : 0,
                    'dire_kills': 0,
                    'rad_wins' : 0,
                    'dire_wins' : 0,
                    'kill_ties' : 0,
                    'rad_tie_wins' : 0,
                    'dire_tie_wins': 0}

    for _id in roshdf['match_id'].unique():
        #slice into _id's rows only
        current_match = roshdf[roshdf['match_id']==_id]
        #get number of radiant and dire Roshan kills in current match
        current_rad_kills = len(current_match['player1'][current_match['player1']==2])
        current_dire_kills = len(current_match['player1'][current_match['player1']==3])
        #grabs match winner so i dont have to call it 3 times
        current_winner = GetMatchWinner(_id)[0]
        
        if current_rad_kills == current_dire_kills:
            roshkill_wins['kill_ties'] += 1
            
            if current_winner == 'Team 1':
                roshkill_wins['rad_tie_wins'] += 1
            else:
                roshkill_wins['dire_tie_wins'] += 1
            
        elif current_rad_kills > current_dire_kills:
            roshkill_wins['rad_kills'] += 1
            
            if current_winner == 'Team 1':
                roshkill_wins['rad_wins'] += 1
                
        elif current_dire_kills > current_rad_kills:
            roshkill_wins['dire_kills'] += 1
            
            if current_winner == 'Team 2':
                roshkill_wins['dire_wins'] += 1

    return roshkill_wins

def GetGoldAdvWins(timestamp=15):
    '''
    params:
    timestamp = time in minutes to get team gold totals (default 15 minutes)
    
    returns:
    dictionary of:
    # of games radiant had gold advantage at timestamp
    # of games dire had gold advantage at timestamp
    # of games radiant won while having gold advantage at timestamp
    # of games dire won while having gold advantage at timestamp
    # of gold ties (if any) at timestamp
    # of games radiant won while having tied gold at timestamp
    # of games dire won while having tied gold at timestamp
    '''
    
    timestamp_df = df_player_time[df_player_time['times'] == 60*timestamp] #multiply by 60 to convert minutes to seconds
    
    gold_adv_dict = {'rad_adv' : 0,
                    'dire_adv': 0,
                    'rad_wins' : 0,
                    'dire_wins' : 0,
                    'adv_ties' : 0,
                    'rad_tie_wins' : 0,
                    'dire_tie_wins': 0}

    for _id in timestamp_df['match_id']:
        
        rad_gold,dire_gold = TeamGoldTotals(_id,timestamp) #TeamGoldTotals uses timestamp as an index instead of lookup
        current_winner = GetMatchWinner(_id)[0]
        
        if rad_gold == dire_gold:
            gold_adv_dict['adv_ties'] += 1
            
            if current_winner == 'Team 1':
                gold_adv_dict['rad_tie_wins'] += 1
            else:
                gold_adv_dict['dire_tie_wins'] += 1
            
        elif rad_gold > dire_gold:
            gold_adv_dict['rad_adv'] += 1
            
            if current_winner == 'Team 1':
                gold_adv_dict['rad_wins'] += 1
                
        elif dire_gold > rad_gold:
            gold_adv_dict['dire_adv'] += 1
            
            if current_winner == 'Team 2':
                gold_adv_dict['dire_wins'] += 1

    return gold_adv_dict

if __name__ == '__main__':

    #plotting
    example_match = np.random.randint(0,50000)
    winner,win_color = GetMatchWinner(example_match)
    y1,y2,x = GoldValsThruMatch(example_match)

    fig, ax = plt.subplots()

    ax.plot(x,y1, label='Team1',color='green')
    ax.plot(x,y2, label='Team2',color='red')

    #Formatting
    ax.set_title('Team gold totals over the course of the match \n (match_id={})'.format(example_match),y=1.1,fontsize=18)
    ax.set_xlabel('Match Time (in minutes)')
    ax.set_ylabel('Team Gold Total', rotation=0, labelpad=60)
    ax.text(0.5,.8,"Match Winner: \n" + winner,
            bbox=dict(edgecolor=win_color,facecolor='none',pad=5),
            horizontalalignment='center',
            verticalalignment='center',
            transform=ax.transAxes)
    ax.legend()
    plt.tight_layout()
    fig.show()

FileNotFoundError: [Errno 2] File ../host/repos/capstone1/data/match_outcomes.csv does not exist: '../host/repos/capstone1/data/match_outcomes.csv'

In [None]:
fig.savefig('../host/repos/capstone1/figures/Team_Gold_Match.png')

In [None]:
gold_adv_dict = {'rad_adv' : 0,
            'dire_adv': 0,
            'rad_wins' : 0,
            'dire_wins' : 0,
            'adv_ties' : 0,
            'rad_tie_wins' : 0,
            'dire_tie_wins': 0}
for _id in np.arange(50000):

    rad_gold,dire_gold = TeamGoldTotals(_id,-1)
    current_winner = GetMatchWinner(_id)[0]

    if rad_gold == dire_gold:
        gold_adv_dict['adv_ties'] += 1

        if current_winner == 'Team 1':
            gold_adv_dict['rad_tie_wins'] += 1
        else:
            gold_adv_dict['dire_tie_wins'] += 1

    elif rad_gold > dire_gold:
        gold_adv_dict['rad_adv'] += 1

        if current_winner == 'Team 1':
            gold_adv_dict['rad_wins'] += 1

    elif dire_gold > rad_gold:
        gold_adv_dict['dire_adv'] += 1

        if current_winner == 'Team 2':
            gold_adv_dict['dire_wins'] += 1
            
gold_adv_dict

In [None]:
print(gold_adv_dict)
print('Radiant win-rate given the most gold at the end of the game = {}'.format(round(gold_adv_dict['rad_wins']/gold_adv_dict['rad_adv'],2)))
print('Dire win-rate given the most gold at the end of the game = {}'.format(round(gold_adv_dict['dire_wins']/gold_adv_dict['dire_adv'],2)))

In [None]:
fig, axs = plt.subplots(4,4, figsize=(18,10))

for i in range(4):
    for j in range(4):
        example_match = np.random.randint(0,50000)
        winner,win_color = GetMatchWinner(example_match)
        y1,y2,x = GoldValsThruMatch(example_match)

        axs[i,j].plot(x,y1, label='Team1',color='green')
        axs[i,j].plot(x,y2, label='Team2',color='red')

        #Formatting

        
        axs[i,j].text(0.5,.8,"    ",
                bbox=dict(facecolor=win_color,pad=5),
                horizontalalignment='center',
                verticalalignment='center',
                transform=axs[i,j].transAxes)
        
        axs[i,j].text(0.3,.8,"winner:",fontsize = 14,
                bbox=dict(facecolor='none',pad=5),
                horizontalalignment='center',
                verticalalignment='center',
                transform=axs[i,j].transAxes)

axs[0,0].legend(bbox_to_anchor=(3.9, 1.6), loc='upper left', ncol=1, fontsize = 15)
        
axs[0,1].set_title('Team gold totals over the course of the match',x=1.1,y=1.1,fontsize=25)
axs[3,1].set_xlabel('Match Time (in minutes)',x=1.2,fontsize=25)
axs[2,0].set_ylabel('Team\nGold', rotation=0, labelpad=40, y=1, fontsize= 20)
plt.tight_layout()
fig.show()

In [None]:
fig.savefig('../host/repos/capstone1/figures/Team_Gold_16_Matches.png')

In [None]:
rosh_kills = GetRoshanTeamKills()

In [None]:
rosh_kills

In [None]:
#Hypothesis test 1: Does killing Roshan more times throughout the game give you a higher likelihood of winning?
'''
H0: 0.5 chance of winning
Ha: higher than 0.5 chance of winning
Alpha: 0.05
'''

In [None]:
dire_wins_beta = stats.beta(rosh_kills['dire_wins'],rosh_kills['dire_kills']-rosh_kills['dire_wins'])
rad_wins_beta = stats.beta(rosh_kills['rad_wins'],rosh_kills['rad_kills']-rosh_kills['rad_wins'])
beta_x = np.linspace(0,1,3000)
# ci = 95

fig, ax= plt.subplots(figsize = (12,5))

ax.plot(beta_x,dire_wins_beta.pdf(beta_x), color='r', label='Dire Probabilities',linewidth=3)
ax.plot(beta_x,rad_wins_beta.pdf(beta_x), color='g', label='Radiant Probabilities',linewidth=3)
# upper,lower = np.percentile(mean_diff,[(100-ci)/2,ci+((100-ci)/2)])
# ax.axvline(upper, color='blue')
# ax.axvline(lower, color='blue')


ax.set_title("Win-rate Probabilities Given Roshan Control\n(3,000 Plot Points)", y=1.1, fontsize=20)
ax.set_xlabel('Possible P-Values\n(Win-rates)')
ax.set_ylabel('Frequency of\nP_Values\n(Win-rates)', rotation=0, labelpad=60)
ax.set_xlim(.75,.9)
ax.legend(fontsize=11, loc='upper center')
plt.tight_layout()
fig.show()

In [None]:
fig.savefig('../host/repos/capstone1/figures/beta_distributions.png')

In [None]:
rosh_binomeal = stats.binom(n=rosh_kills['rad_kills'], p=rosh_kills['rad_wins']/rosh_kills['rad_kills'])
rosh_binomeal_null = stats.binom(n=rosh_kills['rad_kills'], p=.5)

In [None]:
gold_adv = GetGoldAdvWins()

In [None]:
gold_adv_at_15 = {'rad_adv': 25953,
 'dire_adv': 23962,
 'rad_wins': 18150,
 'dire_wins': 16222,
 'adv_ties': 4,
 'rad_tie_wins': 3,
 'dire_tie_wins': 1}
print('Radiant win-rate given the most gold at 15 minutes = {}'.format(round(gold_adv_at_15['rad_wins']/gold_adv_at_15['rad_adv'],2)))
print('Dire win-rate given the most gold at 15 minutes = {}'.format(round(gold_adv_at_15['dire_wins']/gold_adv_at_15['dire_adv'],2)))

In [None]:
#Hypothesis test 2: Does having a gold advantage at 15 minutes higher likelihood of winning?
'''
H0: 0.5 chance of winning
Ha: higher than 0.5 chance of winning
Alpha: 0.05
'''

In [None]:
def BootstrapWinRate(measure_radiant,resamples=1000):
    radiant_wins = 0
    dire_wins = 0
    
    for win in df_match['radiant_win']:
        if win:
            radiant_wins += 1
        else:
            dire_wins += 1
    
    rad_wins_binomeal = stats.binom(n=radiant_wins+dire_wins, p=radiant_wins / 50000)
    dire_wins_binomeal = stats.binom(n=radiant_wins+dire_wins, p=1-(radiant_wins / 50000))

    mean_diff=[]

    for sample in range(resamples):
        rad_bootstrap = rad_wins_binomeal.rvs(1000)
        dire_bootstrap = dire_wins_binomeal.rvs(1000)
        if measure_radiant:
            mean_diff.append(rad_bootstrap.mean()-dire_bootstrap.mean())
        else:
            mean_diff.append(dire_bootstrap.mean()-rad_bootstrap.mean())
    
    return mean_diff

rad_bootstrap = BootstrapWinRate(True)
ci = 95

fig, ax= plt.subplots(figsize = (12,5))

ax.hist(rad_bootstrap, color='g',alpha=.7,label='Radiant')

r_upper,r_lower = np.percentile(rad_bootstrap,[(100-ci)/2,ci+((100-ci)/2)])
ax.axvline(r_upper, color='blue', label="Confidence\nInterval:\n95%")
ax.axvline(r_lower, color='blue')


ax.set_title("Bootstrapped Radiant Winrates of 1 Million Games", y=1.1, fontsize=20)
ax.set_xlabel('Difference in Wins')
ax.set_ylabel('Frequency\n(1 = 1k)', rotation=0, labelpad=40)
ax.legend(fontsize=11, loc='upper right')
plt.tight_layout()
fig.show()

# 1000 samples bootstrapped 1000 times = 1 million

# wins_binomeal_null.cdf(radiant_wins)

In [None]:
fig.savefig('../host/repos/capstone1/figures/RadWinRates.png')
# radiant_wins / 50000
# stats.ttest_ind()

In [None]:
dire_bootstrap = BootstrapWinRate(False)
ci = 95

fig, ax= plt.subplots(figsize = (12,5))

ax.hist(dire_bootstrap, color='r',alpha=.7,label='Dire')

d_upper,d_lower = np.percentile(dire_bootstrap,[(100-ci)/2,ci+((100-ci)/2)])
ax.axvline(d_upper, color='blue', label="Confidence\nInterval:\n95%")
ax.axvline(d_lower, color='blue')


ax.set_title("Bootstrapped Dire Winrates of 1 Million Games", y=1.1, fontsize=20)
ax.set_xlabel('Difference in Wins')
ax.set_ylabel('Frequency\n(1 = 1k)', rotation=0, labelpad=40)
ax.legend(fontsize=11, loc='upper right')
plt.tight_layout()
fig.show()

In [None]:
fig.savefig('../host/repos/capstone1/figures/DireWinRates.png')

In [None]:
radiant_wins = 0
dire_wins = 0
for win in df_match['radiant_win']:
    if win:
        radiant_wins += 1
    else:
        dire_wins += 1
    
(radiant_wins/50000, dire_wins/50000)