In [1]:
import pandas as pd
from fpl import Loader, Player

## Expected Points Model

Using the framework of [General Model Theory](https://en.wikipedia.org/wiki/Model)

##### Mapping - what phenomena are we trying to model?
Given that a player plays a particular fixture (greater than zero minutes played), we want to model how many points the player will score.

##### Reduction - what simplifying assumptions will we make?
The simplifying assumptions we will make are as follows:
- Points scored depends only on fixture difficulty, form and points per game (again, only games where a player plays more than zero minutes)
- There will be no attempt to estimate minutes of each player or likelihood of playing; the effect of minutes will come indirectly through points per game
- There will be no attempt to use other useful statistics from other data sources such as betting odds, opta data, xG etc
- We will restrict our data for analysis to gameweeks 20-24 so that averages are not calculated over so few gameweeks

##### Pragmatism - when can we use the model from a practical point of view?
We can use this model if, for each fixture each player plays, we can calculate:
1. Their points per game so far that season, prior to the fixture.
2. Their form so far than season prior to the fixture.
3. An indication of the difficulty of a particular fixture.

This should be relatively simple, therefore we can create an `ExpectedPointsCalculator` from such a model and plug it into our `Optimizer`.

In [2]:
def compute_points_per_game(player_id: int, as_of_gameweek: int) -> float:
    """Compute points per game a player would have had right before a particular as_of_gameweek occured.
    Points per game are conditional on the player having started that game.

    :param player_id: The unique identifier of the player.
    :param as_of_gameweek: The next gameweek following the gameweeks where the average is calculated over.

    :return: Average points per game.
    """
    historical_fixtures = [
        info
        for i in range(1, as_of_gameweek)
        for info in Loader.get_player_historical_info_for_gameweek(player_id, i)
    ]
    points_array = [
        fixture["total_points"]
        for fixture in historical_fixtures
        if fixture["minutes"] > 0
    ]
    return round(sum(points_array) / len(points_array), 1) if len(points_array) else 0.0


def test_compute_points_per_game_is_correct() -> bool:
    """We test to see whether we are calculating points per game in the same way as the fpl website."""
    total_number_of_players = len(Loader.get_static_info()["elements"])
    for i in range(1, total_number_of_players):
        computed_points_per_game_manual = compute_points_per_game(
            i, Loader.get_next_gameweek()
        )
        computed_points_per_game_official = Loader.get_player_basic_info(i)[
            "points_per_game"
        ]
        computed_points_per_game_match = (
            abs(
                computed_points_per_game_manual
                - float(computed_points_per_game_official)
            )
            < 0.1
        )
        if not computed_points_per_game_match:
            raise Exception(
                f"Mismatch found for player ID {i}: "
                f"manual calculation = {computed_points_per_game_manual}, "
                f"official calculation = {computed_points_per_game_official}"
            )

    return True

In [3]:
test_compute_points_per_game_is_correct()

True

In [4]:
def kickoff_time_30_days_before_gameweek(
    kickoff_time: str, as_of_gameweek: int
) -> bool:
    """Get whether the kick off time of a particular game was 30 days before the start of the gameweek.
    Useful for computing the form of a player.

    :param kickoff_time: Kick off time of a particular game.
    :param as_of_gameweek: The gameweek which you want to find whether the game occured 30 days before.

    :return: Bool indicating whether that game occured 30 days before the gameweek.
    """
    as_of_timestring = [
        x["deadline_time"]
        for x in Loader.get_static_info()["events"]
        if x["id"] == as_of_gameweek
    ][0]
    as_of_timestamp = pd.Timestamp(as_of_timestring)
    thirty_days_ago = as_of_timestamp - pd.Timedelta(days=30)
    kickoff_time_timestamp = pd.Timestamp(kickoff_time, tz="UTC")
    return thirty_days_ago <= kickoff_time_timestamp


def compute_form(player_id: int, as_of_gameweek: int) -> float:
    """Compute form a player would have had right before a particular as_of_gameweek occured.
    Points per game are conditional on the player having started that game.

    :param player_id: The unique identifier of the player.
    :param as_of_gameweek: The next gameweek following the gameweeks where the average is calculated over.

    :return: Average points per game.
    """
    historical_fixtures = [
        info
        for i in range(1, as_of_gameweek)
        for info in Loader.get_player_historical_info_for_gameweek(player_id, i)
    ]
    points_array = [
        fixture["total_points"]
        for fixture in historical_fixtures
        if kickoff_time_30_days_before_gameweek(fixture["kickoff_time"], as_of_gameweek)
    ]
    return round(sum(points_array) / len(points_array), 1) if len(points_array) else 0.0


def test_compute_form_is_correct() -> bool:
    """We test to see whether we are calculating form in the same way as the fpl website."""
    total_number_of_players = len(Loader.get_static_info()["elements"])
    for i in range(1, total_number_of_players):
        computed_form_manual = compute_form(i, Loader.get_next_gameweek())
        computed_form_official = Loader.get_player_basic_info(i)["form"]
        computed_form_match = (
            abs(computed_form_manual - float(computed_form_official)) < 0.1
        )
        if not computed_form_match:
            raise Exception(
                f"Mismatch found for player ID {i}: "
                f"manual calculation = {computed_form_manual}, "
                f"official calculation = {computed_form_official}"
            )

    return True

In [5]:
test_compute_form_is_correct()

True

In [6]:
records = []
for player_id in range(1, len(Loader.get_static_info()["elements"])):
    player_info = Loader.get_player_basic_info(player_id)
    for gameweek in [20, 21, 22, 23, 24]:
        points_per_game = compute_points_per_game(player_id, gameweek)
        form = compute_form(player_id, gameweek)
        gameweek_info = Loader.get_player_historical_info_for_gameweek(
            player_id, gameweek
        )
        # A player can play multiple fixtures in a single gameweek i.e. a "double" gameweek
        for fixture_info in gameweek_info:
            team_id = player_info["team"]
            team_info = Loader.get_team_basic_info(team_id)
            opponent_team_id = fixture_info["opponent_team"]
            opponent_team_info = Loader.get_team_basic_info(opponent_team_id)
            if fixture_info["minutes"] == 0:
                continue
            else:
                records.append(
                    {
                        "player_id": player_id,
                        "gameweek": gameweek,
                        "fixture": fixture_info["fixture"],
                        "web_name": player_info["web_name"],
                        "team": team_info["short_name"],
                        "opponent_team": opponent_team_info["short_name"],
                        "team_id": team_id,
                        "opponent_team_id": opponent_team_id,
                        "was_home": fixture_info["was_home"],
                        "position": player_info["element_type"],
                        "total_points": fixture_info["total_points"],  # response
                        "form": form,  # predictor
                        "points_per_game": points_per_game,  # predictor
                    }
                )

In [7]:
df = pd.DataFrame(records)

In [8]:
df[df["player_id"] == 328]

Unnamed: 0,player_id,gameweek,fixture,web_name,team,opponent_team,team_id,opponent_team_id,was_home,position,total_points,form,points_per_game
760,328,20,196,M.Salah,LIV,MUN,12,14,True,3,7,12.8,11.2
761,328,21,206,M.Salah,LIV,NFO,12,16,False,3,2,13.2,11.0
762,328,22,212,M.Salah,LIV,BRE,12,4,False,3,3,11.0,10.6
763,328,23,226,M.Salah,LIV,IPS,12,10,True,3,8,7.4,10.2
764,328,24,232,M.Salah,LIV,BOU,12,3,False,3,16,5.0,10.1
765,328,24,144,M.Salah,LIV,EVE,12,8,False,3,13,5.0,10.1


TODO:
- Move functions and tests into loader.py OR potentially create a new module utils.py (or another name)
- Do some exploratory data analysis and tire kick to see if it's ok
- Do regression and get some visualizations