In [None]:
from collections import defaultdict
from rating_models import IterativeMargin
from optimize_model import train_valid_test_split
from utils.data import get_data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils.evaluation import logloss, brier, rps, accuracy
%matplotlib inline

### Iterative Margin model

This notebook presents how to use iterative version of one-parameter Poisson model which proved to be the best performing approach in the experiments. Its name stems from the fact that the updates are driven by the difference between expected and actual goal differences, or win margin, in a match (analogously as the difference between expected and actual match result in the Elo model).

In [None]:
params = {'c': 0.02, 'h': 0.3, 'lr': 0.01, 'lambda_reg': 0.0001, 'goal_cap': -1}

In [None]:
model = IterativeMargin(**params)

In [None]:
seasons_train, seasons_valid, seasons_test, seasons_all = train_valid_test_split('E', False)
matches = get_data(seasons_all)

In [None]:
matches.head()

In [None]:
predictions = model.fit_predict(matches, seasons_train, seasons_valid, seasons_test)

### Evaluating model performance

In [None]:
seasons = ['E_1516', 'E_1617', 'E_1718', 'E_1819']

In [None]:
subset = matches['Season'].isin(seasons)

In [None]:
results = matches.loc[subset, 'FTR']

In [None]:
for metric in (logloss, brier, rps, accuracy):
    print('{:>10}: {:.4f}'.format(metric.__name__, metric(predictions[subset], results)))

### "Big six" Premier League  ratings

Remark: `ratings_history` attribute was added specifically for the purpose of retrieving history of ratings only for `IterativeMargin` model. It is not implemented in other models (though very easy to add).

In [None]:
len(model.ratings_history)

In [None]:
ratings = pd.DataFrame(model.ratings_history, columns=['rating_home', 'rating_away'])

In [None]:
matches = pd.concat([matches, ratings], axis=1)

In [None]:
matches.tail(10)

In [None]:
ratings_team = defaultdict(list)

for _, match in matches.iterrows():
    team_home, team_away, rating_home, rating_away = match[['HomeTeam', 'AwayTeam', 'rating_home', 'rating_away']]
    ratings_team[team_home].append(rating_home)
    ratings_team[team_away].append(rating_away)

In [None]:
team_subset = ('Arsenal', 'Chelsea', 'Liverpool', 'Man City', 'Man United', 'Tottenham')

In [None]:
num_seasons = 3
num_rounds = 38

In [None]:
# Get ratings for the last three seasons
ratings_top = pd.DataFrame({team: ratings_team[team][-num_rounds*num_seasons:] for team in team_subset}).reset_index(drop=True)

In [None]:
ratings_top.head()

In [None]:
# Shuffling team colors for the top two teams: Liverpool and Manchester City to align with their shirts
cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
team_colors = [cycle[i] for i in [2, 1, 3, 0, 5, 4]]

In [None]:
rounds_ticks = np.array([0, 9, 19, 29])
save_fig = True

In [None]:
ax = ratings_top.plot(style='.-', figsize=(12, 6), lw=1.75, color=team_colors)
plt.legend(fontsize=13)
ax.set_xlabel('Rounds', fontsize=13)
ax.set_ylabel('Ratings', fontsize=13)
ax.text(14, 0.245, "2016/17", fontsize=11)
ax.text(52, 0.245, "2017/18", fontsize=11)
ax.text(90, 0.245, "2018/19", fontsize=11)
for vline in [37.5, 38 + 37.5]:
    plt.axvline(x=vline, c='k', ls=":", lw=1.0)

plt.xticks(ticks=np.hstack([rounds_ticks, rounds_ticks + 38, rounds_ticks + 2 * 38]),
           labels=np.hstack([rounds_ticks + 1] * len(rounds_ticks)), fontsize=12)

plt.yticks(fontsize=12)

plt.show()
if save_fig:
    ax.get_figure().savefig("ratings_top.pdf", bbox_inches='tight')