In [None]:
import glicko2 as Glicko

In [None]:
# improbable results are likely so set tau low
TAU = 0.2     # system constant
MU = 1500
PHI = 500     # Starting rating deviation
SIGMA = 0.03  # starting volatility

In [None]:
glck = Glicko.Glicko2(mu=MU, tau=TAU, phi=PHI, sigma=SIGMA)

In [None]:
import pandas as pd
import math
import numpy as np

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
naf_data = pd.read_csv("data/all_matches.csv.gz", sep=",",
                       index_col=None)

In [None]:
naf_data["date"] = pd.to_datetime(
    naf_data.date, format="%Y/%m/%d")

In [None]:
naf_data.set_index("date", inplace=True)

In [None]:
naf_data.sort_index(inplace=True)

In [None]:
# up to June '17 due to errors in data
naf_data = naf_data[:pd.Timestamp('2017-06-01')]

In [None]:
plt.plot(naf_data.index, "o")

## Set lookup

In [None]:
race_ids = sorted(naf_data.home_race.unique())

In [None]:
class Player(object):
    
    # need to maintain only historical rankings, not phi etc.
    hist_mu = None
    hist_phi = None
    
    def __init__(self, player_id, periods):
        
        self.pid = player_id
        self.hist_mu = pd.DataFrame(
            columns=race_ids, index=periods, dtype=float)
        self.hist_phi = pd.DataFrame(
            columns=race_ids, index=periods, dtype=float)
        self.rankings = {}
        self.new_rankings = {}
              
    def init_rating(self, race_id, method="median"):
        
        if (method == "default") or (len(self.rankings) < 2):
            
            self.rankings[race_id] = glck.create_rating(
                mu=MU, phi=PHI, sigma=SIGMA)
            
        elif method == "median":
            # other rankings. Possibly exclude stunties? 
            # divide PHI by the number of other ranks?
            # Or always start stunties at 1500?
            mu_vals = [v.mu for v in self.rankings.values()]
            phi_vals = [v.phi for v in self.rankings.values()]
            _mu = np.median(mu_vals)
            _phi = np.max(phi_vals)

            self.rankings[race_id] = glck.create_rating(
                mu=_mu, phi=_phi, sigma=SIGMA)

        return self.rankings[race_id]
    
    def end_ranking_period(self, date):
        
        # copy new rankings
        for race in self.hist_mu.columns:
            if (race in self.rankings) and (race not in self.new_rankings):
                self.rankings[race] = self.decay(race)
            elif race in self.new_rankings:
                self.rankings[race] = self.new_rankings[race]
                
        # delete temp ranks
        self.new_rankings = {}
        
        # fill historical with rankings.
        for rid, rank in self.rankings.items():
            self.hist_mu[rid].loc[date] = rank.mu
            self.hist_phi[rid].loc[date] = rank.phi
            
    def decay(self, race):
        
        # transform into glicko space...
        dnp = glck.scale_down(self.rankings[race])
        
        # increment phi
        phi_star = math.sqrt(min(PHI, dnp.phi ** 2 + dnp.sigma ** 2))
                
        return glck.scale_up(
            glck.create_rating(mu=dnp.mu, phi=phi_star, sigma=dnp.sigma))

In [None]:
cols_home = ["home_coach", "home_race", "home_score"]

cols_away = ["away_coach", "away_race", "away_score"]

In [None]:
tmp = naf_data[cols_home + cols_away].copy()
tmp2 = naf_data[cols_away + cols_home].copy()
tmp2.columns = tmp.columns

In [None]:
rank_data = pd.concat([tmp, tmp2])

In [None]:
diff = (rank_data.home_score - rank_data.away_score)
win = diff > 0
draw = diff == 0
rank_data["result"] = 0 + (0.5 * draw) + (1.0 * win)

In [None]:
grouped_games = rank_data.groupby(pd.Grouper(freq='M'))

In [None]:
rank_periods = [p for p, _ in grouped_games]

In [None]:
ranking_data = dict()

In [None]:
%%time
for period, x in grouped_games:
    
    if period.month == 1:
        print(period)
        
    for xid in x.home_coach.unique():
        if xid not in ranking_data:
            ranking_data[xid] = Player(xid, rank_periods)
    
    # group by player
    grped = x.groupby(["home_coach", "home_race"])
    
    # first run through all players who have played in this period
    for (player, race), data in grped:
        
        player_rank = ranking_data[player]
        
        # if new race
        if race not in player_rank.rankings:
            player_rank.init_rating(race)
            
        series = list()

        for opp_id, opp_race, result in zip(
            data.away_coach, data.away_race, data.result):
            
            opp_rank = ranking_data[opp_id]
            
            # opponent ranking?
            if opp_race not in opp_rank.rankings:
                opp_rank.init_rating(opp_race)
            
            series.append(
                (result, opp_rank.rankings[opp_race]))
        
        player_rank.new_rankings[race] = glck.rate(
            player_rank.rankings[race], series)
        
    # end all ranking periods.
    for k, v in ranking_data.items():
        v.end_ranking_period(period)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
current_ratings = {}

for key, value in ranking_data.items():
    if value.pid is np.nan:
        continue
    mu_val = value.hist_mu.iloc[-1]
    phi_val = value.hist_phi.iloc[-1]
    
    current_ratings[value.pid] = mu_val - (2 * phi_val)

In [None]:
df_current_ratings = pd.DataFrame(pd.concat(current_ratings, names=["coach", "race"]))
df_current_ratings.columns = ["rating"]

In [None]:
df_current_ratings.rating
df_current_ratings.rating.dropna(inplace=True)

In [None]:
df_current_ratings = df_current_ratings.rating.astype("float")

In [None]:
df_current_ratings = df_current_ratings[~np.isnan(df_current_ratings)]

In [None]:
df_current_ratings = df_current_ratings.sort_values(ascending=False)

In [None]:
df_current_ratings[:30]

## Plot historical stats of some players

Issue- need to make mu stickier to slow down changes

Lower volatility?

In [None]:
ranking_data["Atropabelladonna"].rankings

In [None]:
ranking_data["Jimjimany"].rankings

In [None]:
import seaborn as sns

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

poi = (
    ("Jimjimany", "Wood Elves"),
    ("Pipey", "Norse"),
    ("Joemanji", "Undead"),
    ("Purplegoo", "Vampires"), 
    ("mubo", "Dark Elves"),
    ("Podfrey", "Amazons"))

for coach, race in poi:
    
    ranking_ = ranking_data[coach].hist_mu[race] - (2 * ranking_data[coach].hist_phi[race])
    
    ax.plot(ranking_, label="{0}: {1}".format(coach, race))
ax.legend(loc=3)

In [None]:
sns.distplot(df_current_ratings)

In [None]:
df_current_ratings.reset_index().iloc[0]

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

for i in range(7):
    p = df_current_ratings.reset_index().iloc[i]
    coach, race = p.coach, p.race
    ranking_ = ranking_data[coach].hist_mu[race] - (2 * ranking_data[coach].hist_phi[race])
    
    ax.plot(ranking_, label="{0}: {1}".format(coach, race))

ax.legend(loc=3)

In [None]:
x = df_current_ratings.reset_index()

In [None]:
sns.boxplot(y="race", x="rating", data=x)