In [None]:
import bpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import poisson

from airsenal.framework.bpl_interface import get_ratings_dict
from airsenal.framework.schema import Result, session

# Evaluating the team level model vs. a baseline

Our baseline will be an independent poisson model, where the rates in the distribution of home and away goals are set by the mean values in the training data. All teams are treated equally.

We will compare this baseline to the plain BPL model that doesn't use Fifa features, and to the BPL model that does use Fifa features.

We train models on the 15/16 season and test on the 16/17 season.

In [None]:
def get_result_df():
    """
    query the match table and put results into pandas dataframe,
    to train the team-level model.
    """
    df_past = pd.DataFrame(
        np.array(
            [
                [
                    s.fixture.date,
                    s.fixture.home_team,
                    s.fixture.away_team,
                    s.home_score,
                    s.away_score,
                ]
                for s in session.query(Result).all()
            ]
        ),
        columns=["date", "home_team", "away_team", "home_goals", "away_goals"],
    )
    df_past["home_goals"] = df_past["home_goals"].astype(int)
    df_past["away_goals"] = df_past["away_goals"].astype(int)
    df_past["date"] = pd.to_datetime(df_past["date"])
    df_past = df_past[df_past["date"] > "2015-08-01"]
    return df_past.sort_values("date")

In [None]:
df = get_result_df()
df_train = df[df["date"] < "2017-08-10"]
df_test = df[(df["date"] >= "2017-08-10") & (df["date"] < "2018-08-10")]
df_X = get_ratings_dict()

In [None]:
class BaselineModel:
    def __init__(self, df):
        self.results = df
        self.mu_home = df["home_goals"].mean()
        self.mu_away = df["away_goals"].mean()

    def log_score(self, df_test):
        home_probs = poisson.pmf(df_test["home_goals"].values, self.mu_home)
        away_probs = poisson.pmf(df_test["away_goals"].values, self.mu_away)
        return np.sum(np.log(home_probs) + np.log(away_probs)) / len(df_test)

In [None]:
baseline = BaselineModel(df_train)
baseline_score = baseline.log_score(df_test)

In [None]:
model = bpl.BPLModel(data=df_train)
model.fit(max_date="2017-08-10")

model.add_new_team("HUD")
model.add_new_team("BHA")
plain_score = model.log_score(df_test)

In [None]:
model_X = bpl.BPLModel(data=df_train, X=df_X)
model_X.fit(max_date="2017-08-10")

model_X.add_new_team(
    "HUD",
    X=np.ravel(
        df_X.loc[df_X["team"] == "HUD", ["att", "mid", "defn", "ovr"]].values
    ).astype(float),
)
model_X.add_new_team(
    "BHA",
    X=np.ravel(
        df_X.loc[df_X["team"] == "HUD", ["att", "mid", "defn", "ovr"]].values
    ).astype(float),
)
fifa_score = model_X.log_score(df_test)

In [None]:
print(f"Baseline model achieves a score of {baseline_score:.2f}")
print(f"BPL model achieves a score of {plain_score:.2f}")
print(f"BPL model  with fifa features achieves a score of {fifa_score:.2f}")

The ranks of the models is as expected, with the BPL model with fifa features scoring best on holdout data. However, the difference between the two BPL models is markedly smaller than between the simpler BPL model and the baseline. Presumably the main difference comes from the matches involving Brighton and Huddersfield, where the fifa features model will perform better. Let's briefly check this.

In [None]:
df_hud_bha = df_test[
    (df_test["home_team"] == "BHA")
    | (df_test["home_team"] == "HUD")
    | (df_test["away_team"] == "BHA")
    | (df_test["away_team"] == "BHA")
]

In [None]:
baseline_score = baseline.log_score(df_hud_bha)
plain_score = model.log_score(df_hud_bha)
fifa_score = model_X.log_score(df_hud_bha)

In [None]:
print(f"Baseline model achieves a score of {baseline_score:.2f}")
print(f"BPL model achieves a score of {plain_score:.2f}")
print(f"BPL model  with fifa features achieves a score of {fifa_score:.2f}")

The increase in performance is not as much as expected.

In [None]:
for i in range(4):
    sns.distplot(model_X.beta_a[:, i], label=df_X.columns[1:][i])
    plt.legend()

In [None]:
for i in range(4):
    sns.distplot(model_X.beta_b[:, i], label=df_X.columns[1:][i])
    plt.legend()

The fifa features provide weak information about the defensive aptitude of a team, but not the attacking aptitude.