# fotmob

In [115]:
import re
import urllib.parse
from logging import getLevelName, getLogger
from typing import Optional, Union, List, Dict

import requests
from cachecontrol import CacheControl

VERSION = "1.2.0"


class FotMob:
    BASE_URL = "https://www.fotmob.com/api"
    LOGGER = getLogger(__name__)

    def __init__(
        self, proxies: Optional[dict] = None, logging_level: Optional[str] = "WARNING"
    ) -> None:
        SESSION = requests.Session()
        if proxies:
            SESSION.proxies.update(proxies)
        CACHE_SESSION = CacheControl(SESSION)

        if logging_level:
            if logging_level.upper() in [
                "DEBUG",
                "INFO",
                "WARNING",
                "ERROR",
                "CRITICAL",
            ]:
                self.LOGGER.setLevel(getLevelName(logging_level.upper()))
            else:
                print(f"Logging level {logging_level} not recognized!")

        self.session = CACHE_SESSION
        self.matches_url = f"{self.BASE_URL}/matches?"
        self.leagues_url = f"{self.BASE_URL}/leagues?"
        self.teams_url = f"{self.BASE_URL}/teams?"
        self.player_url = f"{self.BASE_URL}/playerData?"
        self.match_details_url = f"{self.BASE_URL}/matchDetails?"
        self.search_url = f"{self.BASE_URL}/searchData?"
        self.tv_listing_url = f"{self.BASE_URL}/tvlisting?"
        self.tv_listings_url = f"{self.BASE_URL}/tvlistings?"
        self.fixtures_url = f"{self.BASE_URL}/fixtures?"

    def _check_date(self, date: str) -> Union[re.Match, None]:
        pattern = re.compile(r"(20\d{2})(\d{2})(\d{2})")
        return pattern.match(date)

    def _execute_query(self, url: str):
        response = self.session.get(url)
        response.raise_for_status()
        self.LOGGER.debug(response)
        return response.json()

    def get_matches_by_date(
        self, date: str, time_zone: str = "America/New_York"
    ) -> dict:
        if self._check_date(date) != None:
            url = f"{self.matches_url}date={date}"
            return self._execute_query(url)
        return {}

    def get_team(
        self,
        id: int,
        tab: str = "overview",
        type: str = "league",
        time_zone: str = "America/New_York",
    ):
        url = f"{self.teams_url}id={id}&tab={tab}&type={type}"
        return self._execute_query(url)

    def get_player(self, id: int):
        url = f"{self.player_url}id={id}"
        return self._execute_query(url)

    def get_match_details(self, match_id: int):
        url = f"{self.match_details_url}matchId={match_id}"
        return self._execute_query(url)

    def get_match_tv_listing(self, match_id: int, country_code: str = "GB"):
        url = f"{self.tv_listing_url}matchId={match_id}&countryCode={country_code}"
        return self._execute_query(url)

    def get_tv_listings_country(self, country_code: str = "GB"):
        url = f"{self.tv_listings_url}countryCode={country_code}"
        return self._execute_query(url)

    def search(self, term: str, user_language: str = "en-GB,en"):
        searchterm = urllib.parse.quote_plus(term)
        url = f"{self.search_url}term={searchterm}&userLanguage={user_language}"
        return self._execute_query(url)

    def get_fixtures(self, id: str, season: str):
        url = f"{self.fixtures_url}id={id}&season={season}"
        return self._execute_query(url)

    def get_fixtures_overview(self, league_id: str, season_start: str, season_end: str) -> List[Dict]:
        url = f"{self.leagues_url}id={league_id}&season={season_start}%2F{season_end}"
        return self._execute_query(url)["overview"]["leagueOverviewMatches"]

# data

In [116]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [117]:
Fot = FotMob()
f = Fot.get_fixtures_overview("47", "2023", "2024") + Fot.get_fixtures_overview("47", "2022", "2023") + Fot.get_fixtures_overview("47", "2021", "2022") + Fot.get_fixtures_overview("47", "2020", "2021")
# example data:
"""
{
    "id": "3900932",
    "pageUrl": "/matches/arsenal-vs-crystal-palace/36ytc8#3900932",
    "opponent": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "home": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "away": {
        "id": "9825",
        "name": "Arsenal",
        "score": 2
    },
    "displayTournament": true,
    "lnameArr": [
        "A",
        " | undefined"
    ],
    "notStarted": false,
    "tournament": {},
    "status": {
        "utcTime": "2022-08-05T19:00:00Z",
        "finished": true,
        "started": true,
        "cancelled": false,
        "scoreStr": "0 - 2",
        "reason": {
            "short": "FT",
            "shortKey": "fulltime_short",
            "long": "Full-Time",
            "longKey": "finished"
        }
    }
}
"""

extracted_data = []

for match in f:
    if match["status"]["finished"]:
        extracted_data.append({
        "home_score" : match["home"]["score"],
        "away_score": match["away"]["score"],
        "home_team": match["home"]["name"],
        "away_team": match["away"]["name"],
        "year" : pd.to_datetime(match["status"]["utcTime"]).year,
        "id" : match["id"]
      })

data = pd.DataFrame(extracted_data)
data

Unnamed: 0,home_score,away_score,home_team,away_team,year,id
0,0,3,Burnley,Man City,2023,4193450
1,2,1,Arsenal,Nottm Forest,2023,4193451
2,1,1,Bournemouth,West Ham,2023,4193452
3,4,1,Brighton,Luton,2023,4193453
4,0,1,Everton,Fulham,2023,4193454
...,...,...,...,...,...,...
1256,2,0,Liverpool,Crystal Palace,2021,3411724
1257,5,0,Man City,Everton,2021,3411725
1258,1,0,Sheff Utd,Burnley,2021,3411726
1259,3,0,West Ham,Southampton,2021,3411727


In [118]:
# get stats

for i, match in data.iterrows():
    match_details = Fot.get_match_details(match["id"])["content"]
    match["h2h"] = match_details["h2h"]["summary"]
    match["home_team_rating"] = match_details["lineup"]["teamRatings"]["home"]

    print(f"{i}/{len(data)}")

data.to_csv("2023_fixtures.csv")

# fixture_stats = pd.concat([pd.read_json("https://raw.githubusercontent.com/brimell/ailvarez/main/data/fixture_data_2223.json"), pd.read_json("https://raw.githubusercontent.com/brimell/ailvarez/main/data/fixture_data_2122.json"), pd.read_json("https://raw.githubusercontent.com/brimell/ailvarez/main/data/fixture_data_2021.json")])
# fixture_stats

data

Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty,pulse_id
0,2292810,1,True,True,1,2022-08-05 19:00:00+00:00,90,False,True,1,2,7,0,"[{'identifier': 'goals_scored', 'a': [{'value'...",4,2,74911
1,2292813,1,True,True,4,2022-08-06 11:30:00+00:00,90,False,True,12,2,9,2,"[{'identifier': 'goals_scored', 'a': [{'value'...",4,2,74914
2,2292811,1,True,True,2,2022-08-06 14:00:00+00:00,90,False,True,2,0,3,2,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,2,74912
3,2292814,1,True,True,5,2022-08-06 14:00:00+00:00,90,False,True,20,1,11,2,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,2,74915
4,2292816,1,True,True,7,2022-08-06 14:00:00+00:00,90,False,True,16,0,15,2,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,4,74917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,2128661,38,True,True,374,2021-05-23 15:00:00+00:00,90,False,True,6,0,11,2,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,3,59271
376,2128662,38,True,True,375,2021-05-23 15:00:00+00:00,90,False,True,7,0,12,5,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",4,5,59272
377,2128663,38,True,True,376,2021-05-23 15:00:00+00:00,90,False,True,4,0,15,1,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,2,59273
378,2128664,38,True,True,377,2021-05-23 15:00:00+00:00,90,False,True,16,0,19,3,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,4,59274


# main

In [119]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(f"Using device: {device}")

Using device: cuda


In [127]:
selected_features = ['home_team', 'away_team', "year"]

team_label_encoder = LabelEncoder()
year_label_encoder = LabelEncoder()

data["home_team"] = team_label_encoder.fit_transform(data["home_team"])
data["away_team"] = team_label_encoder.transform(data["away_team"])
data["year"] = year_label_encoder.fit_transform(data["year"])

X = data[selected_features]
y_home = data['home_score']
y_away = data['away_score']

(
    X_train,
    X_test,
    y_home_train,
    y_home_test,
    y_away_train,
    y_away_test,
) = train_test_split(X, y_home, y_away, test_size=0.05, random_state=42)


X_train_tensor = torch.from_numpy(X_train.astype(np.float32).values).to(device)
y_home_train_tensor = torch.from_numpy(y_home_train.astype(np.float32).values).to(device)
y_away_train_tensor = torch.from_numpy(y_away_train.astype(np.float32).values).to(device)
X_test_tensor = torch.from_numpy(X_test.astype(np.float32).values).to(device)
y_home_test_tensor = torch.from_numpy(y_home_test.astype(np.float32).values).to(device)
y_away_test_tensor = torch.from_numpy(y_away_test.astype(np.float32).values).to(device)


# X_train, X_test, y_home_train, y_home_test, y_away_train, y_away_train

In [130]:

class FootballScorePredictor(nn.Module):
    def __init__(self, input_size, team_vocab_size, year_vocab_size, embedding_dim, hidden_size, output_size):
        super(FootballScorePredictor, self).__init__()

        self.embedding_home = nn.Embedding(team_vocab_size, embedding_dim)
        self.embedding_away = nn.Embedding(team_vocab_size, embedding_dim)
        self.embedding_year = nn.Embedding(year_vocab_size, embedding_dim)

        self.fc1 = nn.Linear(input_size + 3 * embedding_dim, hidden_size) # concatenation of 3 embeddings
        self.fc_home = nn.Linear(hidden_size, output_size)
        self.fc_away = nn.Linear(hidden_size, output_size)

        self.relu = nn.ReLU()


    def forward(self, x, home_team, away_team, year):
        embed_home = self.embedding_home(home_team.long())
        embed_away = self.embedding_away(away_team.long())
        embed_year = self.embedding_year(year.long())

        x = torch.cat((x, embed_home, embed_away, embed_year), dim=1)
        x = self.relu(self.fc1(x))

        home_score = self.fc_home(x)
        away_score = self.fc_away(x)

        return home_score, away_score

def custom_mse_loss(output, target):
    loss = torch.mean((output - target) ** 2)
    neg_penalty = torch.mean(torch.relu(-output))  # Penalize negative predictions
    return loss + neg_penalty

input_size = X_train.shape[1]
hidden_size = 10000
output_size = 1
team_vocab_size = len(team_label_encoder.classes_)
year_vocab_size = len(year_label_encoder.classes_)
embedding_dim = 10
model = FootballScorePredictor(
    input_size, team_vocab_size, year_vocab_size, embedding_dim, hidden_size, output_size
)
model.to(device)

criterion = custom_mse_loss
optimizer = optim.Adam(model.parameters(), lr=0.01)


num_epochs = 10000
for epoch in range(num_epochs):
    home_outputs, away_outputs = model(
        X_train_tensor, X_train_tensor[:, 0], X_train_tensor[:, 1], X_train_tensor[:, 2]
    )
    loss_home = criterion(home_outputs.squeeze(), y_home_train_tensor)
    loss_away = criterion(away_outputs.squeeze(), y_away_train_tensor)
    loss = loss_home + loss_away

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 1000 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Total Loss: {loss.item()}')


Epoch [1000/10000], Total Loss: 2.1402554512023926
Epoch [2000/10000], Total Loss: 1.107411503791809
Epoch [3000/10000], Total Loss: 0.5191128849983215
Epoch [4000/10000], Total Loss: 0.4093443751335144
Epoch [5000/10000], Total Loss: 0.40281620621681213
Epoch [6000/10000], Total Loss: 0.41502678394317627
Epoch [7000/10000], Total Loss: 0.40121740102767944
Epoch [8000/10000], Total Loss: 0.4009696841239929
Epoch [9000/10000], Total Loss: 0.40983593463897705
Epoch [10000/10000], Total Loss: 0.4116908311843872


### graph of test data

In [132]:
import plotly.express as px
import plotly.graph_objects as go

with torch.no_grad():
    home_outputs, away_outputs = model(X_test_tensor, X_test_tensor[:, 0], X_test_tensor[:, 1], X_test_tensor[:, 2])
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_test_np = y_home_test_tensor.cpu().numpy()
    y_away_test_np = y_away_test_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_test['home_team'])
    away_teams = team_label_encoder.inverse_transform(X_test['away_team'])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Actual Score': y_home_test_np,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Actual Score': y_away_test_np,
        'Predicted Score': away_outputs
    })

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(df_all, x='Actual Score', y='Predicted Score', color='Team', symbol='Opponent',
                     title='Teams Performance: Predicted vs Actual Scores',
                     labels={'Actual Score': 'Actual Score', 'Predicted Score': 'Predicted Score'},
                     hover_data=['Team', 'Opponent'])

    line_x_values = np.linspace(0, 8)
    line_y_values = line_x_values

    # Add a trace for the line y = x
    fig.add_trace(go.Scatter(x=line_x_values, y=line_y_values, mode='lines', name='y = x'))

    fig.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode='markers'))
    fig.show()


### graph of training data

In [131]:
import plotly.express as px
import plotly.graph_objects as go

with torch.no_grad():
    home_outputs, away_outputs = model(X_train_tensor, X_train_tensor[:, 0], X_train_tensor[:, 1], X_train_tensor[:, 2])
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_train_np = y_home_train_tensor.cpu().numpy()
    y_away_train_np = y_away_train_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_train['home_team'])
    away_teams = team_label_encoder.inverse_transform(X_train['away_team'])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Actual Score': y_home_train_np,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Actual Score': y_away_train_np,
        'Predicted Score': away_outputs
    })

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(df_all, x='Actual Score', y='Predicted Score', color='Team', symbol='Opponent',
                     title='Teams Performance: Predicted vs Actual Scores',
                     labels={'Actual Score': 'Actual Score', 'Predicted Score': 'Predicted Score'},
                     hover_data=['Team', 'Opponent'])

    line_x_values = np.linspace(0, 8)
    line_y_values = line_x_values

    # Add a trace for the line y = x
    fig.add_trace(go.Scatter(x=line_x_values, y=line_y_values, mode='lines', name='y = x'))

    fig.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode='markers'))
    fig.show()


# predictions for upcoming games

In [124]:
extracted_data = []
for match in f:
    if not match["status"]["finished"]:
        extracted_data.append(
            {
                "home_team": match["home"]["name"],
                "away_team": match["away"]["name"],
            }
        )


new_data = pd.DataFrame(extracted_data)

In [125]:
selected_features = ["home_team", "away_team"]


label_encoder = LabelEncoder()
new_data["home_team"] = label_encoder.fit_transform(new_data["home_team"])
new_data["away_team"] = label_encoder.transform(new_data["away_team"])


X_new = new_data[selected_features]


X_new_tensor = torch.tensor(X_new.values, dtype=torch.float32).to(device)
# X_new_tensor, X_new_tensor[:, -2], X_new_tensor[:, -1]

In [126]:
with torch.no_grad():
    home_scores_pred, away_scores_pred = model(
        X_new_tensor, X_new_tensor[:, -2], X_new_tensor[:, -1]
    )

    home_scores_pred = home_scores_pred.squeeze().cpu().numpy()
    away_scores_pred = away_scores_pred.squeeze().cpu().numpy()

    home_teams = label_encoder.inverse_transform(new_data["home_team"])
    away_teams = label_encoder.inverse_transform(new_data["away_team"])

    df_home = pd.DataFrame(
        {
            "Team": home_teams,
            "Opponent": away_teams,
            "Predicted Score": home_scores_pred,
        }
    )

    df_away = pd.DataFrame(
        {
            "Team": away_teams,
            "Opponent": home_teams,
            "Predicted Score": away_scores_pred,
        }
    )

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(
        df_all,
        x="Team",
        y="Predicted Score",
        color="Team",
        title="Predicted Scores",
        hover_data=["Team", "Opponent"],
    )

    fig.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode="markers"))
    fig.show()

    fig2 = px.scatter(
        df_all,
        x="Team",
        y="Opponent",
        color="Team",
        title="Predicted Scores, Team against Team",
        hover_data=["Team", "Opponent", "Predicted Score"],
    )

    fig2.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode="markers"))
    fig2.show()

TypeError: ignored