In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
url = 'https://docs.google.com/spreadsheets/d/1HcdISgCl3s4RpWkJa8m-G1JjfKzd8qf2WY2Xcw32D7U/pub?gid=1371955398&single=true&output=csv'

In [3]:
def full_cast_counterpick(row):
    full_cast_events = [
        "IYL Season 1",
        "IYL Season 2",
        "IYL Season 3",
        "IYL Season 4",
        "IYL Season 5",
        "NFTT Round 1",
        "NFTT Round 2",
        "NFTT Round 3",
        "NFTT Round 4",
        "NFTT Round 5",
        "NFTT Round 6",
        "NFTT Round 7",
        "NFTT Round 8",
        "Summer Smash",
        "Summer Smash II",
        "Summer Smash III",
        "Summer Smash IV",
    ]
    return not any([
        row.char_select_random,
        row.char_select_locked,
        row.event not in full_cast_events
    ]) 

In [4]:
import pandas
import re

historical_record = pandas.read_csv(url)
historical_record['Match Date'] = pandas.to_datetime(historical_record['Match Date'], infer_datetime_format=True)
historical_record.columns = [re.sub('\W+', '_', col.lower()).strip('_') for col in historical_record.columns]
historical_record.format_restricted.replace(to_replace=['.', 'Restricted'], value=[False, True], inplace=True)
historical_record.format_team.replace(to_replace=['.', 'Team'], value=[False, True], inplace=True)
historical_record.char_select_random.replace(to_replace=['.', 'Random'], value=[False, True], inplace=True)
historical_record.char_select_locked.replace(to_replace=['.', 'Locked'], value=[False, True], inplace=True)
historical_record.set_length_non_ft3_ft4.replace(to_replace=['.', 'non-FT3/FT4'], value=[False, True], inplace=True)
historical_record.set_win_1 = historical_record.set_win_1.fillna(0)
historical_record.set_win_2 = historical_record.set_win_2.fillna(0)
historical_record.wins_1 = historical_record.wins_1.fillna(0)
historical_record.wins_2 = historical_record.wins_2.fillna(0)
historical_record['standard_format'] = historical_record.apply(full_cast_counterpick, axis=1)
historical_record.character_1 = historical_record.character_1.apply(lambda n: n.title())
historical_record.character_2 = historical_record.character_2.apply(lambda n: n.title())

Recurrence Relation:
  * `meta_impact(m, n) = 1 + counterpicks_2 * mu_win * meta_impact(m-1, n) + counterpicks_1 * (1 - mu_win) * meta_impact(m, n-1)`
  * `meta_impact(0, n) = 0`
  * `meta_impact(m, 0) = 0`

Counterpicks:

  * Pmn = Probability of match m/n
  * Wmn = Probability of m winning match m/n
  * Cmn = Probability of counterpicking m with n


    Paa = Waa * Caa + Wab * Caa + Wba * 0   + Wbb * 0
    Pab = Waa * Cab + Wab * Cab + Wba * 0   + Wbb * 0
    Pba = Waa * 0   + Wab * 0   + Wba * Cba + Wbb * Cba
    Pbb = Waa * 0   + Wab * 0   + Wba * Cbb + Wbb * Cbb
    [Caa, Caa, 0,   0      [Waa
     Cab, Cab, 0,   0       Wab
     0,   0,   Cba, Cba     Wba
     0,   0,   Cbb, Cbb] *  Wbb]
    
    Paa = (1-Waa) * Caa + (1-Wab) * 0   + (1-Wba) * Caa + (1-Wbb) * 0
    Pab = (1-Waa) * 0   + (1-Wab) * Cba + (1-Wba) * 0   + (1-Wbb) * Cba
    Pba = (1-Waa) * Cab + (1-Wab) * 0   + (1-Wba) * Cab + (1-Wbb) * 0
    Pbb = (1-Waa) * 0   + (1-Wab) * Cbb + (1-Wba) * 0   + (1-Wbb) * Cbb
    [Caa, 0,   Caa, 0          [Waa
     0,   Cba, 0,   Cba         Wab
     Cab, 0,   Cab, 0           Wba
     0,   Cbb, 0,   Cbb] * 1 -  Wbb]

In [5]:
def counterpicks(matches):
    for pick, counterpick in zip(matches, matches[1:] + [None]):
        if counterpick is None:
            if matches[0]['set_win_1']:
                for _ in range(int(pick['wins_1']) - 1):
                    yield (pick['character_1'], pick['character_2'])
                for _ in range(int(pick['wins_2'])):
                    yield (pick['character_2'], pick['character_1'])
            else:
                for _ in range(int(pick['wins_1'])):
                    yield (pick['character_1'], pick['character_2'])
                for _ in range(int(pick['wins_2'] - 1)):
                    yield (pick['character_2'], pick['character_1'])
                
        else:
            if pick['character_1'] != counterpick['character_1']:
                for _ in range(int(pick['wins_1'])):
                    yield (pick['character_1'], pick['character_2'])
                for _ in range(int(pick['wins_2'] - 1)):
                    yield (pick['character_2'], pick['character_1'])
                yield (pick['character_2'], counterpick['character_1'])
            else:
                for _ in range(int(pick['wins_1'] - 1)):
                    yield (pick['character_1'], pick['character_2'])
                for _ in range(int(pick['wins_2'])):
                    yield (pick['character_2'], pick['character_1'])
                yield (pick['character_1'], counterpick['character_2'])

In [6]:
sets = historical_record[historical_record.standard_format].groupby(['match_date', 'event', 'player_1', 'player_2'])

In [24]:
cast = historical_record[historical_record.standard_format].character_1.unique()
matchups = [(l, r) for l in cast for r in cast]

In [22]:
counterpick_counts = pandas.DataFrame(
    data=(
        {'pick': pick, 'counter': counter}
        for (key, set) in sets
        for (pick, counter) in counterpicks([row for (ix, row) in set.iterrows()])
    )
).groupby(['pick', 'counter']).size().reindex(
    pandas.MultiIndex.from_product([cast, cast], names=['pick', 'counter'])
).fillna(0)

In [23]:
blind_picks = pandas.DataFrame(
    {
        'character_1': list(set['character_1'])[0],
        'character_2': list(set['character_2'])[0],
    }
    for (key, set) in sets
)

blind_pick_counts = blind_picks['character_1'].append(blind_picks['character_2']).value_counts()

In [110]:
half_matchup = historical_record.groupby(['character_1', 'character_2']).sum()[['wins_1', 'wins_2']]
reversed_matchup = half_matchup.reset_index().rename(columns={
    'character_1': 'character_2',
    'character_2': 'character_1',
    'wins_1': 'wins_2',
    'wins_2': 'wins_1',
}).set_index(['character_1', 'character_2'])
matchup_pcts = (half_matchup + reversed_matchup).apply(lambda r: r['wins_1']/(r['wins_1'] + r['wins_2']), axis=1).loc[matchups]

## Computational

In [112]:
# mu_win = pandas.DataFrame(
#     data=matchup_pcts.loc[cast].values,
#     #index=matchup_pcts.reset_index().apply(lambda r: (r.character_1, r.character_2), axis=1),
#     index=pandas.MultiIndex.from_product([cast, cast], names=['character_1', 'character_2']),
# ).loc[matchups]
blind_pcts = blind_pick_counts / blind_pick_counts.sum()
blind_mu_pcts = pandas.Series(
    data=[pct_l*pct_r for pct_l in blind_pcts.values for pct_r in blind_pcts.values],
    index=pandas.MultiIndex.from_product([blind_pcts.index, blind_pcts.index])
)

In [123]:
import itertools

class Meta:
    def __init__(self, exclude=[]):
        mu_exclude = exclude + list(itertools.product(cast, exclude))
        self.blind_mu_pcts = blind_mu_pcts.drop(mu_exclude)
        self.blind_mu_pcts /= self.blind_mu_pcts.sum()
        
        self.matchups = [(l, r) for (l, r) in matchups if l not in exclude and r not in exclude]
        
        self.counterpick_pcts = counterpick_counts.drop(mu_exclude).groupby('pick').apply(lambda g: g / g.sum())
        self.counterpicks_2 = pandas.DataFrame({
            before: {
                after: 0 if before[0] != after[0] else self.counterpick_pcts.loc[before[0], after[1]]
                for after in self.matchups
            } for before in self.matchups
        })
        
        self.counterpicks_1 = pandas.DataFrame({
            before: {
                after: 0 if before[1] != after[1] else self.counterpick_pcts.loc[before[1], after[0]]
                for after in self.matchups
            } for before in self.matchups
        })
        
        self.transition_win = self.counterpicks_2.dot(matchup_pcts.drop(mu_exclude))
        self.transition_lose = self.counterpicks_1.dot(1-matchup_pcts.drop(mu_exclude))

        self._meta_influence = {}

    def _matchup_influence(self, remaining_wins_1, remaining_wins_2):
        ix = (remaining_wins_1, remaining_wins_2)
        if ix not in self._meta_influence:
            if remaining_wins_1 == 0:
                self._meta_influence[ix] = 0
            elif remaining_wins_2 == 0:
                self._meta_influence[ix] = 0
            else:
                self._meta_influence[ix] = (
                    1 +
                    self.transition_win * self._matchup_influence(remaining_wins_1 - 1, remaining_wins_2) +
                    self.transition_lose * self._matchup_influence(remaining_wins_1, remaining_wins_2 - 1)
                )


        return self._meta_influence[ix]

    def matchup_influence(self, remaining_wins_1, remaining_wins_2):
        return self._matchup_influence(remaining_wins_1, remaining_wins_2) * self.blind_mu_pcts
    
    def character_influence(self, remaining_wins_1, remaining_wins_2):
        mus = self.matchup_influence(remaining_wins_1, remaining_wins_2).reset_index()
        chars = mus[['level_0', 0]].rename(columns={'level_0': 'char'}).append(
            mus[['level_1', 0]].rename(columns={'level_1': 'char'})
        ).groupby('char').sum() / 2
        return chars[0]
        
    

In [140]:
meta = Meta(['Zane', 'Troq', 'Degrey'])

print(meta.matchup_influence(4, 4).sort_values())
print(meta.character_influence(4, 4).sort_values())

  
  # This is added back by InteractiveShellApp.init_path()


Jaina       Jaina         0.001547
Persephone  Persephone    0.001661
Jaina       Persephone    0.002017
Persephone  Jaina         0.002017
Vendetta    Jaina         0.002343
Jaina       Vendetta      0.002343
Midori      Persephone    0.002765
Persephone  Midori        0.002765
Midori      Valerie       0.002780
Valerie     Midori        0.002780
Persephone  Gloria        0.002943
Gloria      Persephone    0.002943
Vendetta    Bbb           0.003301
Bbb         Vendetta      0.003301
Lum         Lum           0.003387
Valerie     Valerie       0.003498
Menelker    Bbb           0.003514
Bbb         Menelker      0.003514
            Jaina         0.003536
Jaina       Bbb           0.003536
Gloria      Bbb           0.003556
Bbb         Gloria        0.003556
Valerie     Gloria        0.004035
Gloria      Valerie       0.004035
Midori      Midori        0.004077
Vendetta    Vendetta      0.004513
Menelker    Vendetta      0.004525
Vendetta    Menelker      0.004525
Gloria      Vendetta



In [139]:
matchup_pcts['Degrey']

character_2
Gwen          0.520408
Setsuki       0.400000
Troq          0.426829
Degrey        0.500000
Zane          0.435345
Midori        0.500000
Grave         0.550898
Geiger        0.460177
Menelker      0.582278
Lum           0.681034
Quince        0.460526
Rook          0.534884
Bbb           0.576923
Onimaru       0.690647
Vendetta      0.483333
Jaina         0.576923
Argagarg      0.495413
Valerie       0.625000
Persephone    0.583333
Gloria        0.513889
dtype: float64

## Stochastic

In [8]:
match_starts = pandas.DataFrame({
    'character_1': blind_pick_counts.reset_index()['index'].sample(10000, weights=blind_pick_counts.values, replace=True).reset_index(drop=True),
    'character_2': blind_pick_counts.reset_index()['index'].sample(10000, weights=blind_pick_counts.values, replace=True).reset_index(drop=True),
})

NameError: name 'blind_pick_counts' is not defined

In [23]:
import random
import numpy

class Meta:
    def __init__(self, exclude=None):
        self.exclude = exclude or []
        self.cast = blind_pick_counts.drop(self.exclude).reset_index()['index']
        self.blind_pick_counts = blind_pick_counts.drop(self.exclude)
        self.cps = cps.drop(self.exclude)
        self.cps = self.cps[~self.cps.counter.isin(self.exclude)]

    def sample_blindpick(self, n=10000):
        return self.cast.sample(
            n,
            weights=self.blind_pick_counts.values,
            replace=True
        ).reset_index(drop=True)

    def counterpick(self, char):
        pick = self.cps['counter'][char].sample(1, weights=self.cps[0][char]).values[0]
        return pick

    def generate_match(self, best_of=7, n=10000):
        matchups = pandas.DataFrame({
            'character_1': self.sample_blindpick(n),
            'character_2': self.sample_blindpick(n),
            'remaining_1': best_of // 2 + 1,
            'remaining_2': best_of // 2 + 1,
        })
        while not matchups.empty:
            yield matchups.copy()
            matchup_pct = matchups.apply(lambda r: matchup_pcts[r.character_1][r.character_2], axis=1)
            p1_wins = matchup_pct < numpy.random.random(len(matchup_pct))
            matchups.loc[p1_wins, 'remaining_1'] -= 1
            matchups.loc[~p1_wins, 'remaining_2'] -= 1
            matchups.loc[p1_wins, 'character_2'] = matchups.loc[p1_wins, 'character_1'].apply(self.counterpick)
            matchups.loc[~p1_wins, 'character_1'] = matchups.loc[~p1_wins, 'character_2'].apply(self.counterpick)
            matchups = matchups[matchups.remaining_1 > 0][matchups.remaining_2 > 0]
    
    def equality_errors(self, best_of=7, n=10000):
        all_matches = pandas.concat(self.generate_match(best_of=best_of, n=n))
        char_counts = all_matches['character_1'].append(all_matches['character_2']).value_counts()
        estimated_fracs = (char_counts / char_counts.sum()).sort_values()
        baseline = pandas.Series(data=1/20, index=hist_fracs.index)
        return (
            numpy.linalg.norm(estimated_fracs - baseline[estimated_fracs.index]),
            numpy.linalg.norm(pandas.Series(data=estimated_fracs, index=baseline.index).fillna(0) - baseline),
        )

    

In [31]:
best_exclude = frozenset(['Zane'])
best_scores = Meta(best_exclude).equality_errors(n=1000)

while True:
    next_best_exclude = best_exclude
    next_best_scores = best_scores
    
    for char in blind_pick_counts.index:
        if char in best_exclude:
            next_exclude = best_exclude - set([char])
        else:
            next_exclude = best_exclude | set([char])
        print("Trying", next_exclude)
        
        next_scores = Meta(next_exclude).equality_errors(n=1000)
        print("Found scores", next_scores)
        
        if next_scores[0] < next_best_scores[0]:
            next_best_exclude = next_exclude
            next_best_scores = next_scores
    
    if next_best_exclude == best_exclude:
        break
    else:
        print("Found better scores", next_best_exclude, next_best_scores)
        best_exclude = next_best_exclude
        best_scores = next_best_scores
        
print("Best scores", best_exclude, best_scores)



Trying frozenset()
Found scores (0.067089136474846475, 0.067089136474846475)
Trying frozenset({'Troq', 'Zane'})
Found scores (0.068221668945761427, 0.098255768857330048)
Trying frozenset({'Zane', 'Setsuki'})
Found scores (0.063245860926042272, 0.094868534953778449)
Trying frozenset({'Zane', 'Degrey'})
Found scores (0.064967605222998132, 0.096024943261692863)
Trying frozenset({'Grave', 'Zane'})
Found scores (0.065626244803790604, 0.096471778293172633)
Trying frozenset({'Zane', 'Geiger'})
Found scores (0.068890048655999264, 0.098721015006056068)
Trying frozenset({'Argagarg', 'Zane'})
Found scores (0.076584194018595078, 0.10423597638760725)
Trying frozenset({'Zane', 'Rook'})
Found scores (0.070661878544335976, 0.099965499445631184)
Trying frozenset({'Zane', 'Onimaru'})
Found scores (0.069852006132889485, 0.099394681753045666)
Trying frozenset({'Zane', 'Quince'})
Found scores (0.067869816382366194, 0.098011795084959566)
Trying frozenset({'Zane', 'Gwen'})
Found scores (0.070706361517238303,

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Found scores (0.069783231981275937, 0.099346361109768744)
Trying frozenset({'Midori', 'Setsuki'})
Found scores (0.068635101549761726, 0.098543275593751728)
Trying frozenset({'Midori', 'Degrey'})


KeyboardInterrupt: 

In [15]:
m = historical_record[historical_record.standard_format][['character_1', 'character_2', 'wins_1', 'wins_2']]
by_c = m.groupby('character_1').sum()
hist_counts = by_c['wins_1'] + by_c['wins_2']
hist_fracs = (hist_counts / hist_counts.sum()).sort_values()
hist_fracs

character_1
Jaina         0.022745
Menelker      0.028403
Persephone    0.028630
Midori        0.031006
Vendetta      0.032138
Valerie       0.033835
Gloria        0.034174
Quince        0.037230
Bbb           0.037796
Gwen          0.039719
Lum           0.042662
Onimaru       0.046283
Rook          0.046735
Grave         0.053072
Argagarg      0.068236
Degrey        0.069820
Geiger        0.072310
Setsuki       0.078873
Troq          0.091207
Zane          0.105126
dtype: float64

In [16]:
from numpy.linalg import norm

print(norm(estimated_fracs - baseline[estimated_fracs.index]))
print(norm(pandas.Series(data=estimated_fracs, index=baseline.index).fillna(0) - baseline))
print(norm(hist_fracs - baseline))

0.0715261149542
0.100578253715
0.100780438853
