In [None]:
# default_exp __init__

# Core

> Team-strength models in Python

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
import json
import datetime as dt
import numpy as np
import pprint

In [None]:
#export

# For now, just re-export everything
from mezzala.adapters import *
from mezzala.blocks import *
from mezzala.models import *
from mezzala.weights import *
from mezzala.parameters import *

Let's demo

In [None]:
with open('../data/premier-league-1516.json', 'r') as f:
    pl_1516 = json.load(f)

# Let's parse the dates, too
for match in pl_1516:
    match['date'] = dt.datetime.strptime(match['date'], '%Y-%m-%d')
    
pl_1516[0:3]

[{'date': datetime.datetime(2015, 8, 8, 0, 0),
  'team1': 'Manchester United FC',
  'team2': 'Tottenham Hotspur FC',
  'score': {'ft': [1, 0]}},
 {'date': datetime.datetime(2015, 8, 8, 0, 0),
  'team1': 'AFC Bournemouth',
  'team2': 'Aston Villa FC',
  'score': {'ft': [0, 1]}},
 {'date': datetime.datetime(2015, 8, 8, 0, 0),
  'team1': 'Leicester City FC',
  'team2': 'Sunderland AFC',
  'score': {'ft': [4, 2]}}]

A model in `mezzala` is composed of 2 parts:

* Model blocks (see `mezzala.blocks`)
* An adapter (see `mezzala.adapters`)

The model blocks determine which terms your model estimates. In general, you will want
to estimate offensive and defensive strength for each team (`TeamStrength`) and 
as well as home advantage `HomeAdvantage`.

The selected model blocks can be supplied to the model as a list:

In [None]:
blocks = [TeamStrength(), HomeAdvantage()]

An adapter connects your model to the data source. In other words, it tells the model how find the information needed to fit.

The information needed is determined by which model blocks are used. In our case,

* All models require `home_goals` and `away_goals`
* `TeamStrength` - requires `home_team` and `away_team`

`HomeAdvantage` doesn't require any information, since it assumes all matches have equal home-field advantage by default.

In [None]:
adapter = KeyAdapter(               # `KeyAdapter` = data['...']
    home_team='team1',
    away_team='team2',
    home_goals=['score', 'ft', 0],  # Get nested fields with lists of fields
    away_goals=['score', 'ft', 1],  # i.e. data['score']['ft'][1]
)

Pulling this together, we can construct a model from an adapter and blocks

In [None]:
model = DixonColes(adapter=adapter, blocks=blocks)
model.fit(pl_1516)

# All estimates should be valid numbers
assert all(not np.isnan(x) for x in model.params.values())

# Home advantage should be positive
assert 1.0 < np.exp(model.params[HFA_KEY]) < 2.0

  np.log(self._tau(home_goals, away_goals, home_rate, away_rate, rho))


In [None]:
param_keys = model.params.keys()
param_key_len = max(len(str(k)) for k in param_keys)

for k in param_keys:
    key_str = str(k).ljust(param_key_len + 1)
    print(f'{key_str}: {np.exp(model.params[k]):0.2f}')

OffenceParameterKey(label='Norwich City FC')         : 0.77
OffenceParameterKey(label='Manchester United FC')    : 0.94
OffenceParameterKey(label='Chelsea FC')              : 1.15
OffenceParameterKey(label='Leicester City FC')       : 1.31
OffenceParameterKey(label='Sunderland AFC')          : 0.94
OffenceParameterKey(label='Tottenham Hotspur FC')    : 1.33
OffenceParameterKey(label='Manchester City FC')      : 1.38
OffenceParameterKey(label='Southampton FC')          : 1.14
OffenceParameterKey(label='Stoke City FC')           : 0.81
OffenceParameterKey(label='West Bromwich Albion FC') : 0.66
OffenceParameterKey(label='Aston Villa FC')          : 0.54
OffenceParameterKey(label='Swansea City FC')         : 0.82
OffenceParameterKey(label='West Ham United FC')      : 1.27
OffenceParameterKey(label='Crystal Palace FC')       : 0.76
OffenceParameterKey(label='Arsenal FC')              : 1.25
OffenceParameterKey(label='AFC Bournemouth')         : 0.89
OffenceParameterKey(label='Everton FC') 

Making predictions for a single match

In [None]:
scorelines = model.predict_one({
    'team1': 'Manchester City FC',
    'team2': 'Swansea City FC',
})

scorelines[0:5]

[ScorelinePrediction(home_goals=0, away_goals=0, probability=0.05607098978860709),
 ScorelinePrediction(home_goals=0, away_goals=1, probability=0.045632138210651055),
 ScorelinePrediction(home_goals=0, away_goals=2, probability=0.01856835455844811),
 ScorelinePrediction(home_goals=0, away_goals=3, probability=0.005037148590563076),
 ScorelinePrediction(home_goals=0, away_goals=4, probability=0.0010248430674165848)]

In [None]:
outcomes = scorelines_to_outcomes(scorelines)

list(outcomes.values())

[OutcomePrediction(outcome=Outcomes(Home win), probability=0.664579565201895),
 OutcomePrediction(outcome=Outcomes(Draw), probability=0.1983374496050809),
 OutcomePrediction(outcome=Outcomes(Away win), probability=0.13708298519302461)]

Or for multiple matches

In [None]:
many_scorelines = model.predict([
    {'team1': 'Manchester City FC',
     'team2': 'Swansea City FC'},
    {'team1': 'Manchester City FC',
     'team2': 'West Ham United FC'}
])

What about a model with a different weighting method?

By default, the `DixonColes` model weights all matches equally. However, it's more realistic to give matches
closer to the current date a bigger weight than those a long time ago.

The original Dixon-Coles paper suggests using an exponential weight, and we can use the same:

In [None]:
season_end_date = max(match['date'] for match in pl_1516)

weight = ExponentialWeight(
    # Value of `epsilon` is taken from the original paper
    epsilon=-0.0065,  
    key=lambda x: (season_end_date - x['date']).days
)

In [None]:
model_exp = DixonColes(
    adapter=adapter,
    blocks=blocks,
    weight=weight
)
model_exp.fit(pl_1516)

<mezzala.models.DixonColes at 0x11d522668>

How much does that change the ratings at season-end?

In [None]:
for k in param_keys:
    key_str = str(k).ljust(param_key_len + 1)
    model_param = np.exp(model.params[k])
    model_exp_param = np.exp(model_exp.params[k])
    print(f'{key_str}: {model_param:0.2f} -> {model_exp_param:0.2f} ({model_exp_param/model_param:0.2f})')

OffenceParameterKey(label='Norwich City FC')         : 0.77 -> 0.69 (0.90)
OffenceParameterKey(label='Manchester United FC')    : 0.94 -> 0.92 (0.98)
OffenceParameterKey(label='Chelsea FC')              : 1.15 -> 1.20 (1.04)
OffenceParameterKey(label='Leicester City FC')       : 1.31 -> 1.25 (0.95)
OffenceParameterKey(label='Sunderland AFC')          : 0.94 -> 0.99 (1.05)
OffenceParameterKey(label='Tottenham Hotspur FC')    : 1.33 -> 1.34 (1.01)
OffenceParameterKey(label='Manchester City FC')      : 1.38 -> 1.36 (0.98)
OffenceParameterKey(label='Southampton FC')          : 1.14 -> 1.26 (1.11)
OffenceParameterKey(label='Stoke City FC')           : 0.81 -> 0.82 (1.01)
OffenceParameterKey(label='West Bromwich Albion FC') : 0.66 -> 0.60 (0.91)
OffenceParameterKey(label='Aston Villa FC')          : 0.54 -> 0.49 (0.91)
OffenceParameterKey(label='Swansea City FC')         : 0.82 -> 0.88 (1.08)
OffenceParameterKey(label='West Ham United FC')      : 1.27 -> 1.33 (1.04)
OffenceParameterKey(label