# fotmob

In [55]:
import re
import urllib.parse
from logging import getLevelName, getLogger
from typing import Optional, Union, List, Dict

import requests
from cachecontrol import CacheControl

VERSION = "1.2.0"


class FotMob:
    BASE_URL = "https://www.fotmob.com/api"
    LOGGER = getLogger(__name__)

    def __init__(
        self, proxies: Optional[dict] = None, logging_level: Optional[str] = "WARNING"
    ) -> None:
        SESSION = requests.Session()
        if proxies:
            SESSION.proxies.update(proxies)
        CACHE_SESSION = CacheControl(SESSION)

        if logging_level:
            if logging_level.upper() in [
                "DEBUG",
                "INFO",
                "WARNING",
                "ERROR",
                "CRITICAL",
            ]:
                self.LOGGER.setLevel(getLevelName(logging_level.upper()))
            else:
                print(f"Logging level {logging_level} not recognized!")

        self.session = CACHE_SESSION
        self.matches_url = f"{self.BASE_URL}/matches?"
        self.leagues_url = f"{self.BASE_URL}/leagues?"
        self.teams_url = f"{self.BASE_URL}/teams?"
        self.player_url = f"{self.BASE_URL}/playerData?"
        self.match_details_url = f"{self.BASE_URL}/matchDetails?"
        self.search_url = f"{self.BASE_URL}/searchData?"
        self.tv_listing_url = f"{self.BASE_URL}/tvlisting?"
        self.tv_listings_url = f"{self.BASE_URL}/tvlistings?"
        self.fixtures_url = f"{self.BASE_URL}/fixtures?"

    def _check_date(self, date: str) -> Union[re.Match, None]:
        pattern = re.compile(r"(20\d{2})(\d{2})(\d{2})")
        return pattern.match(date)

    def _execute_query(self, url: str):
        response = self.session.get(url)
        response.raise_for_status()
        self.LOGGER.debug(response)
        return response.json()

    def get_matches_by_date(
        self, date: str, time_zone: str = "America/New_York"
    ) -> dict:
        if self._check_date(date) != None:
            url = f"{self.matches_url}date={date}"
            return self._execute_query(url)
        return {}

    def get_team(
        self,
        id: int,
        tab: str = "overview",
        type: str = "league",
        time_zone: str = "America/New_York",
    ):
        url = f"{self.teams_url}id={id}&tab={tab}&type={type}"
        return self._execute_query(url)

    def get_player(self, id: int):
        url = f"{self.player_url}id={id}"
        return self._execute_query(url)

    def get_match_details(self, match_id: int):
        url = f"{self.match_details_url}matchId={match_id}"
        return self._execute_query(url)

    def get_match_tv_listing(self, match_id: int, country_code: str = "GB"):
        url = f"{self.tv_listing_url}matchId={match_id}&countryCode={country_code}"
        return self._execute_query(url)

    def get_tv_listings_country(self, country_code: str = "GB"):
        url = f"{self.tv_listings_url}countryCode={country_code}"
        return self._execute_query(url)

    def search(self, term: str, user_language: str = "en-GB,en"):
        searchterm = urllib.parse.quote_plus(term)
        url = f"{self.search_url}term={searchterm}&userLanguage={user_language}"
        return self._execute_query(url)

    def get_fixtures(self, id: str, season: str):
        url = f"{self.fixtures_url}id={id}&season={season}"
        return self._execute_query(url)

    def get_fixtures_overview(self, league_id: str, season_start: str, season_end: str) -> List[Dict]:
        url = f"{self.leagues_url}id={league_id}&season={season_start}%2F{season_end}"
        return self._execute_query(url)["overview"]["leagueOverviewMatches"]

# data

In [56]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd

In [57]:
Fot = FotMob()
# f = Fot.get_fixtures_overview("47", "2023", "2024") + Fot.get_fixtures_overview("47", "2022", "2023") + Fot.get_fixtures_overview("47", "2021", "2022") + Fot.get_fixtures_overview("47", "2020", "2021")
f = Fot.get_fixtures_overview("47", "2023", "2024")
# example data:
"""
{
    "id": "3900932",
    "pageUrl": "/matches/arsenal-vs-crystal-palace/36ytc8#3900932",
    "opponent": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "home": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "away": {
        "id": "9825",
        "name": "Arsenal",
        "score": 2
    },
    "displayTournament": true,
    "lnameArr": [
        "A",
        " | undefined"
    ],
    "notStarted": false,
    "tournament": {},
    "status": {
        "utcTime": "2022-08-05T19:00:00Z",
        "finished": true,
        "started": true,
        "cancelled": false,
        "scoreStr": "0 - 2",
        "reason": {
            "short": "FT",
            "shortKey": "fulltime_short",
            "long": "Full-Time",
            "longKey": "finished"
        }
    }
}
"""

extracted_data = []

for match in f:
    if match["status"]["finished"]:
        extracted_data.append({
        "home_score" : match["home"]["score"],
        "away_score": match["away"]["score"],
        "home_team": match["home"]["name"],
        "away_team": match["away"]["name"],
        "year" : pd.to_datetime(match["status"]["utcTime"]).year,
        "id" : match["id"]
      })

data = pd.DataFrame(extracted_data)
data

Unnamed: 0,home_score,away_score,home_team,away_team,year,id
0,0,3,Burnley,Man City,2023,4193450
1,2,1,Arsenal,Nottm Forest,2023,4193451
2,1,1,Bournemouth,West Ham,2023,4193452
3,4,1,Brighton,Luton,2023,4193453
4,0,1,Everton,Fulham,2023,4193454
...,...,...,...,...,...,...
125,1,3,Sheff Utd,Bournemouth,2023,4193659
126,0,1,Brentford,Arsenal,2023,4193651
127,1,2,Tottenham,Aston Villa,2023,4193660
128,0,3,Everton,Man United,2023,4193653


In [58]:
# # get stats

# for i, match in data.iterrows():
#     match_details = Fot.get_match_details(match["id"])["content"]
#     match["h2h"] = match_details["h2h"]["summary"]
#     match["home_team_rating"] = match_details["lineup"]["teamRatings"]["home"]

#     print(f"{i}/{len(data)}")

# data.to_csv("2023_fixtures.csv")

# # fixture_stats = pd.concat([pd.read_json("https://raw.githubusercontent.com/brimell/ailvarez/main/data/fixture_data_2223.json"), pd.read_json("https://raw.githubusercontent.com/brimell/ailvarez/main/data/fixture_data_2122.json"), pd.read_json("https://raw.githubusercontent.com/brimell/ailvarez/main/data/fixture_data_2021.json")])
# # fixture_stats

# data

# main

In [59]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(f"Using device: {device}")

Using device: cuda


In [60]:
selected_features = ['home_team', 'away_team', "year"]

team_label_encoder = LabelEncoder()
year_label_encoder = LabelEncoder()

data["home_team"] = team_label_encoder.fit_transform(data["home_team"])
data["away_team"] = team_label_encoder.transform(data["away_team"])
data["year"] = year_label_encoder.fit_transform(data["year"])

X = data[selected_features]
y_home = data['home_score']
y_away = data['away_score']

(
    X_train,
    X_test,
    y_home_train,
    y_home_test,
    y_away_train,
    y_away_test,
) = train_test_split(X, y_home, y_away, test_size=0.1, random_state=42)


X_train_tensor = torch.from_numpy(X_train.astype(np.float32).values).to(device)
y_home_train_tensor = torch.from_numpy(y_home_train.astype(np.float32).values).to(device)
y_away_train_tensor = torch.from_numpy(y_away_train.astype(np.float32).values).to(device)
X_test_tensor = torch.from_numpy(X_test.astype(np.float32).values).to(device)
y_home_test_tensor = torch.from_numpy(y_home_test.astype(np.float32).values).to(device)
y_away_test_tensor = torch.from_numpy(y_away_test.astype(np.float32).values).to(device)


# X_train, X_test, y_home_train, y_home_test, y_away_train, y_away_train

In [61]:
import time


class FootballScorePredictor(nn.Module):
    def __init__(self, input_size, team_vocab_size, year_vocab_size, team_embedding_dim, year_embedding_dim, hidden_size, output_size):
        super(FootballScorePredictor, self).__init__()

        self.embedding_team = nn.Embedding(team_vocab_size, team_embedding_dim)
        self.embedding_year = nn.Embedding(year_vocab_size, year_embedding_dim)

        self.fc1 = nn.Linear(input_size + 2 * team_embedding_dim + year_embedding_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        self.fc5 = nn.Linear(hidden_size, hidden_size)
        self.fc6 = nn.Linear(hidden_size, hidden_size)
        self.fc_home = nn.Linear(hidden_size, output_size)
        self.fc_away = nn.Linear(hidden_size, output_size)

        self.relu = nn.ReLU()


    def forward(self, x, home_team, away_team, year):
        embed_home_team = self.embedding_team(home_team.long())
        embed_away_team = self.embedding_team(away_team.long())
        embed_year = self.embedding_year(year.long())

        x = torch.cat((x, embed_home_team, embed_away_team, embed_year), dim=1)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.relu(self.fc5(x))
        x = self.relu(self.fc6(x))

        home_score = self.fc_home(x)
        away_score = self.fc_away(x)

        return home_score, away_score

def custom_mse_loss(output, target):
    loss = torch.mean((output - target) ** 2)
    # neg_penalty = torch.mean(torch.relu(-output))  # Penalize negative predictions
    neg_penalty = 0
    return loss + neg_penalty

input_size = X_train.shape[1]
hidden_size = 500
output_size = 1
team_vocab_size = len(team_label_encoder.classes_)
year_vocab_size = len(year_label_encoder.classes_) + 1
team_embedding_dim = 1100
year_embedding_dim = 8
model = FootballScorePredictor(
    input_size, team_vocab_size, year_vocab_size, team_embedding_dim, year_embedding_dim, hidden_size, output_size
)
model.to(device)

criterion = custom_mse_loss
optimizer = optim.Adam(model.parameters(), lr=0.000003)

t0 = time.time()


num_epochs = 10000
for epoch in range(num_epochs):
    home_outputs, away_outputs = model(
        X_train_tensor, X_train_tensor[:, 0], X_train_tensor[:, 1], X_train_tensor[:, 2]
    )
    loss_home = criterion(home_outputs.squeeze(), y_home_train_tensor)
    loss_away = criterion(away_outputs.squeeze(), y_away_train_tensor)
    loss = loss_home + loss_away

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % (100) == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Total Loss: {loss.item()}')

t1 = time.time()

total = t1-t0
print(f"total time: {total}")

Epoch [100/10000], Total Loss: 7.957542419433594
Epoch [200/10000], Total Loss: 7.236265182495117
Epoch [300/10000], Total Loss: 4.974786758422852
Epoch [400/10000], Total Loss: 3.0267882347106934
Epoch [500/10000], Total Loss: 2.7608482837677
Epoch [600/10000], Total Loss: 2.4715652465820312
Epoch [700/10000], Total Loss: 1.9487245082855225
Epoch [800/10000], Total Loss: 1.3306641578674316
Epoch [900/10000], Total Loss: 0.920231819152832
Epoch [1000/10000], Total Loss: 0.6189245581626892
Epoch [1100/10000], Total Loss: 0.36646151542663574
Epoch [1200/10000], Total Loss: 0.18625408411026
Epoch [1300/10000], Total Loss: 0.08382518589496613
Epoch [1400/10000], Total Loss: 0.037020593881607056
Epoch [1500/10000], Total Loss: 0.019216833636164665
Epoch [1600/10000], Total Loss: 0.009900431148707867
Epoch [1700/10000], Total Loss: 0.003649732330814004
Epoch [1800/10000], Total Loss: 0.0014608870260417461
Epoch [1900/10000], Total Loss: 0.0007896475726738572
Epoch [2000/10000], Total Loss: 0

### graph of test data

In [70]:
import plotly.express as px
import plotly.graph_objects as go

with torch.no_grad():
    home_outputs, away_outputs = model(
        X_test_tensor, X_test_tensor[:, 0], X_test_tensor[:, 1], X_test_tensor[:, 2]
    )
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_test_np = y_home_test_tensor.cpu().numpy()
    y_away_test_np = y_away_test_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_test["home_team"])
    away_teams = team_label_encoder.inverse_transform(X_test["away_team"])

    df_home = pd.DataFrame(
        {
            "Team": home_teams,
            "Opponent": away_teams,
            "Actual Score": y_home_test_np,
            "Predicted Score": home_outputs,
        }
    )

    df_away = pd.DataFrame(
        {
            "Team": away_teams,
            "Opponent": home_teams,
            "Actual Score": y_away_test_np,
            "Predicted Score": away_outputs,
        }
    )


    df_merged = pd.DataFrame()
    for i in range(len(df_home)):
        curr_home = df_home.iloc[i]
        curr_away = df_away.iloc[i]
        final = pd.DataFrame([{
            "home_team": curr_home["Team"],
            "away_team": curr_home["Opponent"],
            "home_actual_score": curr_home["Actual Score"],
            "away_actual_score": curr_away["Actual Score"],
            "actual_result": "win" if curr_home["Actual Score"] > curr_away["Actual Score"] else "loss" if curr_home["Actual Score"] < curr_away["Actual Score"] else "draw",
            "home_predicted_score": curr_home["Predicted Score"],
            "away_predicted_score": curr_away["Predicted Score"],
            "predicted_result": "win" if curr_home["Predicted Score"] > curr_away["Predicted Score"] else "loss" if curr_home["Predicted Score"] < curr_away["Predicted Score"] else "draw",
        }])
        df_merged = pd.concat([df_merged, final])

    # Calculate the accuracy for win predictions
    df_merged["correct_win"] = df_merged["actual_result"] == df_merged["predicted_result"]
    win_accuracy = df_merged["correct_win"].sum() / len(df_merged)
    print(f"Win Accuracy: {win_accuracy}")
    
    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(
        df_all,
        x="Actual Score",
        y="Predicted Score",
        color="Team",
        symbol="Opponent",
        title="Teams Performance: Predicted vs Actual Scores",
        labels={"Actual Score": "Actual Score", "Predicted Score": "Predicted Score"},
        hover_data=["Team", "Opponent"],
    )

    m, b = np.polyfit(df_all["Actual Score"], df_all["Predicted Score"], 1)
    trendline = m * df_all["Actual Score"] + b
    fig.add_trace(
        go.Scatter(
            x=df_all["Actual Score"],
            y=trendline,
            mode="lines",
            name="Trendline",
            line={"color": "red"},
        )
    )
    print(f"Gradient: {m}, Target: 1")
    """
    best so far: 0.5826897630277947
    team_embedding_dim = 1100
    num_epochs = 1000
    hidden_size = 500
    lr=0.000003
    6 layers
    """
    line_x_values = np.linspace(0, 5)
    line_y_values = line_x_values
    fig.add_trace(
        go.Scatter(
            x=line_x_values,
            y=line_y_values,
            mode="lines",
            name="y = x",
            line={"color": "green"},
        )
    )

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode="markers"))
    fig.show()

Accuracy Percentage: 45.23%


KeyError: ignored

### graph of training data

In [63]:
with torch.no_grad():
    home_outputs, away_outputs = model(X_train_tensor, X_train_tensor[:, 0], X_train_tensor[:, 1], X_train_tensor[:, 2])
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_train_np = y_home_train_tensor.cpu().numpy()
    y_away_train_np = y_away_train_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_train['home_team'])
    away_teams = team_label_encoder.inverse_transform(X_train['away_team'])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Actual Score': y_home_train_np,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Actual Score': y_away_train_np,
        'Predicted Score': away_outputs
    })

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(df_all, x='Actual Score', y='Predicted Score', color='Team', symbol='Opponent',
                     title='Teams Performance: Predicted vs Actual Scores',
                     labels={'Actual Score': 'Actual Score', 'Predicted Score': 'Predicted Score'},
                     hover_data=['Team', 'Opponent'])

    m, b = np.polyfit(df_all['Actual Score'], df_all['Predicted Score'], 1)
    trendline = m * df_all['Actual Score'] + b
    fig.add_trace(go.Scatter(x=df_all['Actual Score'], y=trendline, mode='lines', name='Trendline', line={"color": "red"}))

    line_x_values = np.linspace(0, 8)
    line_y_values = line_x_values
    fig.add_trace(go.Scatter(x=line_x_values, y=line_y_values, mode='lines', name='y = x', line={"color": "green"}))

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode='markers'))
    fig.show()


# predictions for upcoming games

In [64]:
extracted_data = []

for match in f:
    if not match["status"]["finished"]:
        extracted_data.append({
        "home_team": match["home"]["name"],
        "away_team": match["away"]["name"],
        "year" : pd.to_datetime(match["status"]["utcTime"]).year,
        "id" : match["id"]
      })
new_data = pd.DataFrame(extracted_data)

In [65]:
team_label_encoder = LabelEncoder()
year_label_encoder = LabelEncoder()

new_data["home_team"] = team_label_encoder.fit_transform(new_data["home_team"])
new_data["away_team"] = team_label_encoder.transform(new_data["away_team"])
new_data["year"] = year_label_encoder.fit_transform(new_data["year"])

X_new = new_data[selected_features]


X_new_tensor = torch.from_numpy(X_new.astype(np.float32).values).to(device)
# X_new_tensor, X_new_tensor[:, 0], X_new_tensor[:, 1], X_new_tensor[:, 2]

In [66]:
with torch.no_grad():
    home_outputs, away_outputs = model(X_new_tensor, X_new_tensor[:, 0], X_new_tensor[:, 1], X_new_tensor[:, 2])
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(new_data["home_team"])
    away_teams = team_label_encoder.inverse_transform(new_data["away_team"])

    print(pd.DataFrame(
        {
            "Team": home_teams,
            "Opponent": away_teams,
            "Predicted Score": home_outputs,
        }
    ))
    df_home = pd.DataFrame(
        {
            "Team": home_teams,
            "Opponent": away_teams,
            "Predicted Score": home_outputs,
        }
    )

    df_away = pd.DataFrame(
        {
            "Team": away_teams,
            "Opponent": home_teams,
            "Predicted Score": away_outputs,
        }
    )

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(
        df_all,
        x="Team",
        y="Predicted Score",
        color="Team",
        title="Predicted Scores",
        hover_data=["Team", "Opponent"],
    )

    fig.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode="markers"))
    fig.show()

    fig2 = px.scatter(
        df_all,
        x="Team",
        y="Opponent",
        color="Team",
        title="Predicted Scores, Team against Team",
        hover_data=["Team", "Opponent", "Predicted Score"],
    )

    fig2.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode="markers"))
    fig2.show()

               Team     Opponent  Predicted Score
0            Fulham       Wolves         0.887380
1           Arsenal       Wolves         2.932664
2         Brentford        Luton         2.419785
3           Burnley    Sheff Utd         2.136740
4      Nottm Forest      Everton         1.746142
..              ...          ...              ...
245  Crystal Palace  Aston Villa         0.944867
246       Liverpool       Wolves         2.243615
247           Luton       Fulham         1.324217
248        Man City     West Ham         2.426514
249       Sheff Utd    Tottenham         0.561229

[250 rows x 3 columns]
