In [226]:
import math
import numpy as np
import scipy as sp
import pandas as pd
import altair as alt
import seaborn as sns
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [43]:
army = pd.read_csv('engagement_positions.csv')
army['minutes'] = round(army['gameloop'] / 22.4 / 60, 1)
army.head()

Unnamed: 0,relative_position,army_value,current_stage,gameloop,minutes
0,0.439029,3775,early,20640,15.4
1,0.957414,3350,early,16800,12.5
2,0.263368,5600,early,15600,11.6
3,0.377699,6900,early,15840,11.8
4,0.788531,4150,early,17520,13.0


In [44]:
early_pos = alt.Chart(army[army['current_stage'] == 'early']).mark_bar().encode(
    x=alt.X('relative_position:Q', bin=True),
    y='count()',
)
early_army = alt.Chart(army[army['current_stage'] == 'early']).mark_bar().encode(
    x=alt.X('army_value:Q', bin=True),
    y='count()',
)

mid_pos = alt.Chart(army[army['current_stage'] == 'mid']).mark_bar().encode(
    x=alt.X('relative_position:Q', bin=True),
    y='count()',
)
mid_army = alt.Chart(army[army['current_stage'] == 'mid']).mark_bar().encode(
    x=alt.X('army_value:Q', bin=True),
    y='count()',
)

late_pos = alt.Chart(army[army['current_stage'] == 'late']).mark_bar().encode(
    x=alt.X('relative_position:Q', bin=True),
    y='count()',
)
late_army = alt.Chart(army[army['current_stage'] == 'late']).mark_bar().encode(
    x=alt.X('army_value:Q', bin=True),
    y='count()',
)

position_vs_min = alt.Chart(army[army['minutes'] <= 30]).mark_circle(size=60).encode(
    x='minutes',
    y='relative_position',
)
position_vs_min_hi = alt.Chart(army[(army['minutes'] <= 30) & (army['relative_position'] > 0.5)]).mark_circle(size=60).encode(
    x='minutes',
    y='relative_position',
)
position_vs_min_low = alt.Chart(army[(army['minutes'] <= 30) & (army['relative_position'] < 0.5)]).mark_circle(size=60).encode(
    x='minutes',
    y='relative_position',
)

(early_pos & early_army
& mid_pos & mid_army
& late_pos & late_army
& (position_vs_min + position_vs_min.transform_loess('minutes', 'relative_position').mark_line(size=4))
& (position_vs_min_hi + position_vs_min_hi.transform_loess('minutes', 'relative_position').mark_line(size=4))
& (position_vs_min_low + position_vs_min_low.transform_loess('minutes', 'relative_position').mark_line(size=4)))

In [249]:
outcomes = pd.read_csv('match_length_outcomes.csv')
# outcomes = pd.read_csv('match_length_outcomes_tvz.csv')
outcomes['minutes'] = round(outcomes['seconds'] / 60, 1)
# print(outcomes[outcomes['minutes'] > 30])
print(np.mean(outcomes['minutes']), np.std(outcomes['minutes']))
print(sorted(sp.stats.zscore(outcomes['minutes'])))
outcomes = outcomes[(outcomes['minutes'] <= 27)]
print(len(outcomes))
outcomes.head()

13.876999999999999 7.593679674571479
[-1.7615965609920041, -1.7220900222839528, -1.7089211760479357, -1.6035704061597993, -1.4850507900356456, -1.4850507900356456, -1.4455442513275942, -1.379700020147509, -1.3401934814394578, -1.3270246352034405, -1.1163230954271672, -1.0768165567191161, -1.0768165567191161, -1.0768165567191161, -1.0768165567191161, -1.0373100180110648, -1.0373100180110648, -1.0109723255390308, -1.0109723255390308, -0.9451280943589454, -0.9319592481229283, -0.9056215556508942, -0.87928386317886, -0.8266084782347918, -0.8134396319987746, -0.8134396319987746, -0.8002707857627576, -0.8002707857627576, -0.7871019395267405, -0.7871019395267405, -0.7739330932907234, -0.7739330932907234, -0.7607642470547064, -0.7475954008186894, -0.734426554582672, -0.734426554582672, -0.721257708346655, -0.6554134771665697, -0.6422446309305526, -0.6422446309305526, -0.6422446309305526, -0.6290757846945356, -0.6159069384585186, -0.5764003997504673, -0.5632315535144502, -0.5632315535144502, -0

Unnamed: 0,outcome,seconds,minutes
0,win,540,9.0
1,win,1052,17.5
2,win,755,12.6
3,win,782,13.0
4,loss,758,12.6


In [253]:
binned_wins = np.histogram(outcomes[outcomes['outcome'] == 'win']['minutes'], bins=9, range=(0, 27), density=False)
binned_losses = np.histogram(outcomes[outcomes['outcome'] == 'loss']['minutes'], bins=9, range=(0, 27), density=False)

print(binned_wins)
print(binned_losses)

binned_winrates = []
for i in range(len(binned_wins[0])):
    bin_wins = binned_wins[0][i]
    bin_losses = binned_losses[0][i]
    if bin_wins + bin_losses > 0:
        bin_winrate = bin_wins / (bin_wins + bin_losses)
        binned_winrates.append((round(bin_winrate, 3), bin_wins + bin_losses))
    else:
        binned_winrates.append((0, 0))
print(binned_winrates)
print(list(map(lambda x: round(x[0] * 200, 0), binned_winrates)))
print(len(binned_winrates))
print(np.mean(list(map(lambda x: x[0], binned_winrates))), np.std(list(map(lambda x: x[0], binned_winrates))))

(array([ 3,  7, 14, 22, 22, 10,  8,  6,  4], dtype=int64), array([ 0.,  3.,  6.,  9., 12., 15., 18., 21., 24., 27.]))
(array([ 4,  1,  9, 29, 26, 14,  5,  5,  3], dtype=int64), array([ 0.,  3.,  6.,  9., 12., 15., 18., 21., 24., 27.]))
[(0.429, 7), (0.875, 8), (0.609, 23), (0.431, 51), (0.458, 48), (0.417, 24), (0.615, 13), (0.545, 11), (0.571, 7)]
[86.0, 175.0, 122.0, 86.0, 92.0, 83.0, 123.0, 109.0, 114.0]
9
0.55 0.1369314508147132


In [248]:
binned_matches = all_matches = alt.Chart(
    pd.DataFrame({
        'y': map(lambda x: x[0], binned_winrates),
        'x': [3 * i for i in range(0, len(binned_winrates))],
    })
).mark_bar().encode(
    x='x',
    y='y',
)
binned_count = all_matches = alt.Chart(
    pd.DataFrame({
        'y': map(lambda x: x[1], binned_winrates),
        'x': [3 * i for i in range(0, len(binned_winrates))],
    })
).mark_bar().encode(
    x='x',
    y='y',
)
binned_exp = all_matches = alt.Chart(
    pd.DataFrame({
        'y': map(lambda x: 1.9896907216494846 * ((x[0] * x[1]) / len(outcomes)), binned_winrates),
        'x': [3 * i for i in range(0, len(binned_winrates))],
    })
).mark_bar().encode(
    x='x',
    y='y',
)

wins = len(outcomes[outcomes['outcome'] == 'win'])
losses = len(outcomes[outcomes['outcome'] == 'loss'])
print(wins / (wins + losses), wins, losses)
print(sum(map(lambda x: (x[0] * x[1]) / len(outcomes), binned_winrates)))
print(1 / (wins / (wins + losses)))
print(list(map(lambda x: round(4 * 400 * 1.9896907216494846 * ((x[0] * x[1]) / len(outcomes)), 0), binned_winrates)))
print(list(map(lambda x: round(1.9896907216494846 * ((x[0] * x[1]) / len(outcomes)), 3), binned_winrates)))

all_matches = alt.Chart(outcomes).mark_bar().encode(
    x=alt.X('minutes:Q', bin=True),
    y='count()',
) 
wins = alt.Chart(outcomes[outcomes['outcome'] == 'win']).mark_bar().encode(
    x=alt.X('minutes:Q', bin=True),
    y='count()',
)
losses = alt.Chart(outcomes[outcomes['outcome'] == 'loss']).mark_bar().encode(
    x=alt.X('minutes:Q', bin=True),
    y='count()',
)

binned_matches & binned_count & binned_exp & all_matches & wins & losses

0.5 96 96
0.4997291666666667
2.0
[50.0, 182.0, 282.0, 480.0, 232.0, 149.0, 133.0, 83.0]
[0.031, 0.114, 0.176, 0.3, 0.145, 0.093, 0.083, 0.052]


In [205]:
# collection_outcomes = pd.read_csv('collection_rate_outcomes.csv')
collection_outcomes = pd.read_csv('collection_rate_outcomes_tvz.csv')
collection_outcomes = collection_outcomes[collection_outcomes['collection_rate'] <= 5000]
print(len(collection_outcomes))
collection_outcomes.head()

317


Unnamed: 0,outcome,race,collection_rate
0,win,Terran,3599
1,loss,Zerg,3650
2,win,Terran,4770
3,loss,Zerg,3773
4,win,Zerg,4277


In [197]:
binned_wins = np.histogram(collection_outcomes[(collection_outcomes['outcome'] == 'win') & (collection_outcomes['race'] == 'Terran')]['collection_rate'], bins=10, range=(0, 5000), density=False)
binned_losses = np.histogram(collection_outcomes[(collection_outcomes['outcome'] == 'loss') & (collection_outcomes['race'] == 'Terran')]['collection_rate'], bins=10, range=(0, 5000), density=False)

print(binned_wins)
print(binned_losses)

binned_winrates = []
for i in range(len(binned_wins[0])):
    bin_wins = binned_wins[0][i]
    bin_losses = binned_losses[0][i]
    if bin_wins + bin_losses > 0:
        bin_winrate = bin_wins / (bin_wins + bin_losses)
        binned_winrates.append((round(bin_winrate, 3), bin_wins + bin_losses))
    else:
        binned_winrates.append((0, 0))
print(binned_winrates)
print(list(map(lambda x: round(x[0] * 200, 0), binned_winrates)))

(array([ 1,  0,  4,  2,  1,  6,  6, 20, 31, 11], dtype=int64), array([   0.,  500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,
       4500., 5000.]))
(array([ 0,  1,  1,  3,  2,  6,  8, 28, 24, 16], dtype=int64), array([   0.,  500., 1000., 1500., 2000., 2500., 3000., 3500., 4000.,
       4500., 5000.]))
[(1.0, 1), (0.0, 1), (0.8, 5), (0.4, 5), (0.333, 3), (0.5, 12), (0.429, 14), (0.417, 48), (0.564, 55), (0.407, 27)]
[200.0, 0.0, 160.0, 80.0, 67.0, 100.0, 86.0, 83.0, 113.0, 81.0]


In [198]:
binned_matches = all_matches = alt.Chart(
    pd.DataFrame({
        'y': map(lambda x: x[0], binned_winrates),
        'x': [3 * i for i in range(0, len(binned_winrates))],
    })
).mark_bar().encode(
    x='x',
    y='y',
)
binned_count = all_matches = alt.Chart(
    pd.DataFrame({
        'y': map(lambda x: x[1], binned_winrates),
        'x': [3 * i for i in range(0, len(binned_winrates))],
    })
).mark_bar().encode(
    x='x',
    y='y',
)
binned_exp = all_matches = alt.Chart(
    pd.DataFrame({
        'y': map(lambda x: 1.9896907216494846 * ((x[0] * x[1]) / len(outcomes)), binned_winrates),
        'x': [3 * i for i in range(0, len(binned_winrates))],
    })
).mark_bar().encode(
    x='x',
    y='y',
)

wins = len(outcomes[outcomes['outcome'] == 'win'])
losses = len(outcomes[outcomes['outcome'] == 'loss'])
print(wins / (wins + losses), wins, losses)
print(sum(map(lambda x: (x[0] * x[1]) / len(outcomes), binned_winrates)))
print(1 / (wins / (wins + losses)))
print(list(map(lambda x: round(4 * 400 * 1.9896907216494846 * ((x[0] * x[1]) / len(outcomes)), 0), binned_winrates)))
print(list(map(lambda x: round(1.9896907216494846 * ((x[0] * x[1]) / len(outcomes)), 3), binned_winrates)))

all_matches = alt.Chart(collection_outcomes[collection_outcomes['race'] == 'Terran']).mark_bar().encode(
    x=alt.X('collection_rate:Q', bin=True),
    y='count()',
) 
wins = alt.Chart(collection_outcomes[(collection_outcomes['outcome'] == 'win') & (collection_outcomes['race'] == 'Terran')]).mark_bar().encode(
    x=alt.X('collection_rate:Q', bin=True),
    y='count()',
)
losses = alt.Chart(collection_outcomes[(collection_outcomes['outcome'] == 'loss') & (collection_outcomes['race'] == 'Terran')]).mark_bar().encode(
    x=alt.X('collection_rate:Q', bin=True),
    y='count()',
)

(binned_matches & binned_count & binned_exp) & all_matches & wins & losses

0.46994535519125685 86 97
0.4482513661202185
2.127906976744186
[17.0, 0.0, 70.0, 35.0, 17.0, 104.0, 104.0, 348.0, 540.0, 191.0]
[0.011, 0.0, 0.043, 0.022, 0.011, 0.065, 0.065, 0.218, 0.337, 0.119]


In [204]:
terran_game_length = outcomes['minutes']
terran_collection_rate = collection_outcomes[collection_outcomes['race'] == 'Terran']['collection_rate']
length_vs_collection = pd.DataFrame({
    'game_length': terran_game_length,
    'collection_rate': terran_collection_rate,
})

print(len(terran_game_length))
print(len(terran_collection_rate))

scatter = alt.Chart(length_vs_collection).mark_circle(size=60).encode(
    x='game_length',
    y='collection_rate',
)

scatter + scatter.transform_loess('game_length', 'collection_rate').mark_line(size=4)

186
186
