# fotmob

In [1]:
import re
import urllib.parse
from logging import getLevelName, getLogger
from typing import Optional, Union, List, Dict

import requests
from cachecontrol import CacheControl

VERSION = "1.2.0"


class FotMob:
    BASE_URL = "https://www.fotmob.com/api"
    LOGGER = getLogger(__name__)

    def __init__(
        self, proxies: Optional[dict] = None, logging_level: Optional[str] = "WARNING"
    ) -> None:
        SESSION = requests.Session()
        if proxies:
            SESSION.proxies.update(proxies)
        CACHE_SESSION = CacheControl(SESSION)

        if logging_level:
            if logging_level.upper() in [
                "DEBUG",
                "INFO",
                "WARNING",
                "ERROR",
                "CRITICAL",
            ]:
                self.LOGGER.setLevel(getLevelName(logging_level.upper()))
            else:
                print(f"Logging level {logging_level} not recognized!")

        self.session = CACHE_SESSION
        self.matches_url = f"{self.BASE_URL}/matches?"
        self.leagues_url = f"{self.BASE_URL}/leagues?"
        self.teams_url = f"{self.BASE_URL}/teams?"
        self.player_url = f"{self.BASE_URL}/playerData?"
        self.match_details_url = f"{self.BASE_URL}/matchDetails?"
        self.search_url = f"{self.BASE_URL}/searchData?"
        self.tv_listing_url = f"{self.BASE_URL}/tvlisting?"
        self.tv_listings_url = f"{self.BASE_URL}/tvlistings?"
        self.fixtures_url = f"{self.BASE_URL}/fixtures?"

    def _check_date(self, date: str) -> Union[re.Match, None]:
        pattern = re.compile(r"(20\d{2})(\d{2})(\d{2})")
        return pattern.match(date)

    def _execute_query(self, url: str):
        response = self.session.get(url)
        response.raise_for_status()
        self.LOGGER.debug(response)
        return response.json()

    def get_matches_by_date(
        self, date: str, time_zone: str = "America/New_York"
    ) -> dict:
        if self._check_date(date) != None:
            url = f"{self.matches_url}date={date}"
            return self._execute_query(url)
        return {}

    def get_team(
        self,
        id: int,
        tab: str = "overview",
        type: str = "league",
        time_zone: str = "America/New_York",
    ):
        url = f"{self.teams_url}id={id}&tab={tab}&type={type}"
        return self._execute_query(url)

    def get_player(self, id: int):
        url = f"{self.player_url}id={id}"
        return self._execute_query(url)

    def get_match_details(self, match_id: int):
        url = f"{self.match_details_url}matchId={match_id}"
        return self._execute_query(url)

    def get_match_tv_listing(self, match_id: int, country_code: str = "GB"):
        url = f"{self.tv_listing_url}matchId={match_id}&countryCode={country_code}"
        return self._execute_query(url)

    def get_tv_listings_country(self, country_code: str = "GB"):
        url = f"{self.tv_listings_url}countryCode={country_code}"
        return self._execute_query(url)

    def search(self, term: str, user_language: str = "en-GB,en"):
        searchterm = urllib.parse.quote_plus(term)
        url = f"{self.search_url}term={searchterm}&userLanguage={user_language}"
        return self._execute_query(url)

    def get_fixtures(self, id: str, season: str):
        url = f"{self.fixtures_url}id={id}&season={season}"
        return self._execute_query(url)

    def get_fixtures_overview(self, league_id: str, season_start: str, season_end: str) -> List[Dict]:
        url = f"{self.leagues_url}id={league_id}&season={season_start}%2F{season_end}"
        return self._execute_query(url)["overview"]["leagueOverviewMatches"]

# data

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.expand_frame_repr', False)

In [3]:
Fot = FotMob()
f = (
    Fot.get_fixtures_overview("47", "2023", "2024")
    + Fot.get_fixtures_overview("47", "2022", "2023")
    + Fot.get_fixtures_overview("47", "2021", "2022")
    + Fot.get_fixtures_overview("47", "2020", "2021")
)
# example data:
"""
{
    "id": "3900932",
    "pageUrl": "/matches/arsenal-vs-crystal-palace/36ytc8#3900932",
    "opponent": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "home": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "away": {
        "id": "9825",
        "name": "Arsenal",
        "score": 2
    },
    "displayTournament": true,
    "lnameArr": [
        "A",
        " | undefined"
    ],
    "notStarted": false,
    "tournament": {},
    "status": {
        "utcTime": "2022-08-05T19:00:00Z",
        "finished": true,
        "started": true,
        "cancelled": false,
        "scoreStr": "0 - 2",
        "reason": {
            "short": "FT",
            "shortKey": "fulltime_short",
            "long": "Full-Time",
            "longKey": "finished"
        }
    }
}
"""



'\n{\n    "id": "3900932",\n    "pageUrl": "/matches/arsenal-vs-crystal-palace/36ytc8#3900932",\n    "opponent": {\n        "id": "9826",\n        "name": "Crystal Palace",\n        "score": 0\n    },\n    "home": {\n        "id": "9826",\n        "name": "Crystal Palace",\n        "score": 0\n    },\n    "away": {\n        "id": "9825",\n        "name": "Arsenal",\n        "score": 2\n    },\n    "displayTournament": true,\n    "lnameArr": [\n        "A",\n        " | undefined"\n    ],\n    "notStarted": false,\n    "tournament": {},\n    "status": {\n        "utcTime": "2022-08-05T19:00:00Z",\n        "finished": true,\n        "started": true,\n        "cancelled": false,\n        "scoreStr": "0 - 2",\n        "reason": {\n            "short": "FT",\n            "shortKey": "fulltime_short",\n            "long": "Full-Time",\n            "longKey": "finished"\n        }\n    }\n}\n'

In [10]:
def extract_values(data):
    result = {}
    if data is None:
        return result
    for key, value in data.items():
        if isinstance(value, dict):
            result |= extract_values(value)
        elif isinstance(value, list) and len(value) == 2:
            if value[0]: result[f"home_{data['key']}"] = value[0]
            if value[1]: result[f"away_{data['key']}"] = value[1]
        elif isinstance(value, list) and len(value) > 2:
            for i in range(len(value)):
                result |= extract_values(value[i])
        
    return result

extracted_data = []

for match in f:
    temp_data = {}
    if match["status"]["finished"]:
        
        match_details = Fot.get_match_details(match["id"])["content"]
        stats = match_details["stats"]
        temp_data |= extract_values(stats)
        
        temp_data |= {
            "home_score": match["home"]["score"],
            "away_score": match["away"]["score"],
            "home_team": match["home"]["name"],
            "away_team": match["away"]["name"],
            "result": 1 if match["home"]["score"] > match["away"]["score"] else 0,
            "date": pd.to_datetime(match["status"]["utcTime"]),
            "year": pd.to_datetime(match["status"]["utcTime"]).year,
            "day": pd.to_datetime(match["status"]["utcTime"]).day_of_week,
            "id": match["id"],
        }
        
        print(temp_data)
        extracted_data.append(temp_data)
    

data = pd.DataFrame(extracted_data)
# store the data
data.to_csv("data.csv")
data

{'home_BallPossesion': 42, 'away_BallPossesion': 58, 'home_expected_goals': '0.07', 'away_expected_goals': '1.30', 'home_total_shots': 2, 'away_total_shots': 12, 'home_ShotsOnTarget': 0, 'away_ShotsOnTarget': 5, 'home_big_chance': 0, 'away_big_chance': 1, 'home_big_chance_missed_title': 0, 'away_big_chance_missed_title': 0, 'home_accurate_passes': '185 (83%)', 'away_accurate_passes': '275 (90%)', 'home_fouls': 6, 'away_fouls': 3, 'home_corners': 5, 'away_corners': 4, 'home_shots': None, 'away_shots': None, 'home_ShotsOffTarget': 1, 'away_ShotsOffTarget': 2, 'home_blocked_shots': 1, 'away_blocked_shots': 5, 'home_shots_woodwork': 0, 'away_shots_woodwork': 0, 'home_shots_inside_box': 1, 'away_shots_inside_box': 10, 'home_shots_outside_box': 1, 'away_shots_outside_box': 2, 'home_expected_goals_open_play': '0.04', 'away_expected_goals_open_play': '0.47', 'home_expected_goals_set_play': '0.02', 'away_expected_goals_set_play': '0.83', 'home_expected_goals_non_penalty': '0.07', 'away_expected

Unnamed: 0,home_BallPossesion,away_BallPossesion,home_expected_goals,away_expected_goals,home_total_shots,away_total_shots,home_ShotsOnTarget,away_ShotsOnTarget,home_big_chance,away_big_chance,...,away_score,home_team,away_team,result,date,year,day,id,home_touches_opp_box,away_touches_opp_box
0,42.0,58.0,0.07,1.30,2.0,12.0,0.0,5.0,0.0,1.0,...,3,Burnley,Man City,0,2023-08-11 19:00:00+00:00,2023,4,4193450,,
1,72.0,28.0,0.51,0.90,8.0,5.0,4.0,2.0,0.0,1.0,...,1,Arsenal,Nottm Forest,1,2023-08-12 12:00:00+00:00,2023,5,4193451,,
2,60.0,40.0,1.29,0.33,11.0,6.0,3.0,1.0,1.0,0.0,...,1,Bournemouth,West Ham,0,2023-08-12 14:00:00+00:00,2023,5,4193452,,
3,71.0,29.0,2.64,1.04,12.0,3.0,9.0,2.0,4.0,1.0,...,1,Brighton,Luton,1,2023-08-12 14:00:00+00:00,2023,5,4193453,51.0,21.0
4,53.0,47.0,0.60,1.46,9.0,8.0,4.0,2.0,1.0,2.0,...,1,Everton,Fulham,0,2023-08-12 14:00:00+00:00,2023,5,4193454,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266,70.0,30.0,1.61,0.60,19.0,5.0,5.0,4.0,3.0,2.0,...,0,Liverpool,Crystal Palace,1,2021-05-23 15:00:00+00:00,2021,6,3411724,,
1267,68.0,32.0,2.56,1.09,21.0,8.0,11.0,3.0,5.0,1.0,...,0,Man City,Everton,1,2021-05-23 15:00:00+00:00,2021,6,3411725,,
1268,43.0,57.0,0.49,1.06,12.0,10.0,3.0,3.0,0.0,0.0,...,0,Sheff Utd,Burnley,1,2021-05-23 15:00:00+00:00,2021,6,3411726,,
1269,37.0,63.0,1.33,1.51,14.0,17.0,7.0,5.0,3.0,2.0,...,0,West Ham,Southampton,1,2021-05-23 15:00:00+00:00,2021,6,3411727,,


In [None]:
data["home_team_code"] = data["home_team"].astype("category").cat.codes
data["away_team_code"] = data["away_team"].astype("category").cat.codes

train = data[data["year"] < 2022]
test = data[data["year"] > 2022]


predictors = ["day", "year", "home_team_code", "away_team_code"]

# main

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(f"Using device: {device}")

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=42)

rf.fit(train[predictors], train["result"])

preds = rf.predict(test[predictors])

acc = accuracy_score(test["result"], preds)
prec = precision_score(test["result"], preds)

combined = pd.DataFrame(dict(actual=test["result"], prediction=preds))
display(pd.crosstab(index=combined['actual'], columns=[combined['prediction']]))

acc, prec

In [None]:
teams = pd.unique(data[['home_team', 'away_team']].values.ravel('K'))

team_dfs = {}

for team in teams:
    team_data = data[(data['home_team'] == team) | (data['away_team'] == team)]
    team_data = team_data.assign(is_home = team_data['home_team'] == team)
    team_data["is_home"] = team_data["is_home"].astype(int)
    team_dfs[team] = team_data
    
team_dfs['Man City']

In [None]:
def rolling_averages(group, cols, new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    
    return group

cols = ["goals_for", "goals_against", "shots", "shots_taken", "free_kicks", "penalty_kicks", "penalty_kick_attempts"]
new_cols = [f"{c}_rolling" for c in cols]
new_cols

### graph of test data

In [None]:
import plotly.express as px
import plotly.graph_objects as go

with torch.no_grad():
    home_outputs, away_outputs = model(
        X_test_tensor, X_test_tensor[:, 0], X_test_tensor[:, 1], X_test_tensor[:, 2]
    )
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_test_np = y_home_test_tensor.cpu().numpy()
    y_away_test_np = y_away_test_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_test["home_team"])
    away_teams = team_label_encoder.inverse_transform(X_test["away_team"])

    df_home = pd.DataFrame(
        {
            "Team": home_teams,
            "Opponent": away_teams,
            "Actual Score": y_home_test_np,
            "Predicted Score": home_outputs,
        }
    )

    df_away = pd.DataFrame(
        {
            "Team": away_teams,
            "Opponent": home_teams,
            "Actual Score": y_away_test_np,
            "Predicted Score": away_outputs,
        }
    )


    df_merged = pd.DataFrame()
    for i in range(len(df_home)):
        curr_home = df_home.iloc[i]
        curr_away = df_away.iloc[i]
        final = pd.DataFrame([{
            "home_team": curr_home["Team"],
            "away_team": curr_home["Opponent"],
            "home_actual_score": curr_home["Actual Score"],
            "away_actual_score": curr_away["Actual Score"],
            "actual_result": "win" if curr_home["Actual Score"] > curr_away["Actual Score"] else "loss" if curr_home["Actual Score"] < curr_away["Actual Score"] else "draw",
            "home_predicted_score": curr_home["Predicted Score"],
            "away_predicted_score": curr_away["Predicted Score"],
            "predicted_result": "win" if round(curr_home["Predicted Score"]) > round(curr_away["Predicted Score"]) else "loss" if round(curr_home["Predicted Score"]) < round(curr_away["Predicted Score"]) else "draw",
            # "predicted_result": "win" if curr_home["Predicted Score"] > curr_away["Predicted Score"] else "loss" if curr_home["Predicted Score"] < curr_away["Predicted Score"] else "draw",
        }])
        df_merged = pd.concat([df_merged, final])

    # Calculate the accuracy for predictions
    df_merged["prediction_accuracy"] = df_merged["actual_result"] == df_merged["predicted_result"]
    prediction_accuracy = df_merged["prediction_accuracy"].sum() / len(df_merged)
    print(f"Prediction Accuracy: {prediction_accuracy * 100}%")

    # calculate the accuracy for predicting home team always wins
    correct_win_home_team = df_merged["actual_result"] == "win"
    win_accuracy_home = correct_win_home_team.sum() / len(df_merged)
    print(f"Win Accuracy of home team always wins strategy: {win_accuracy_home * 100}")

    # calculate the accuracy for predicting home team always wins
    correct_win_away_team = df_merged["actual_result"] == "loss"
    win_accuracy_away = correct_win_away_team.sum() / len(df_merged)
    print(f"Win Accuracy of away team always wins strategy: {win_accuracy_away * 100}")

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(
        df_merged,
        x="home_actual_score",
        y="home_predicted_score",
        color="home_team",
        symbol="away_team",
        title="Teams Performance: Predicted vs Actual Scores",
        hover_data=["home_team", "away_team", "actual_result", "predicted_result", "home_predicted_score", "away_predicted_score", "home_actual_score", "away_actual_score"],
    )

    m, b = np.polyfit(df_all["Actual Score"], df_all["Predicted Score"], 1)
    trendline = m * df_all["Actual Score"] + b
    fig.add_trace(
        go.Scatter(
            x=df_all["Actual Score"],
            y=trendline,
            mode="lines",
            name="Trendline",
            line={"color": "red"},
        )
    )
    print(f"Gradient: {m}, Target: 1")
    """
    Best so far:

    Win Accuracy: 53.84615384615385%
    Gradient: 0.5826897630277947
    team_embedding_dim = 1100
    num_epochs = 1000
    hidden_size = 500
    lr=0.000003
    6 layers
    """
    line_x_values = np.linspace(0, 5)
    line_y_values = line_x_values
    fig.add_trace(
        go.Scatter(
            x=line_x_values,
            y=line_y_values,
            mode="lines",
            name="y = x",
            line={"color": "green"},
        )
    )

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode="markers"))
    fig.show()

### graph of training data

In [None]:
with torch.no_grad():
    home_outputs, away_outputs = model(X_train_tensor, X_train_tensor[:, 0], X_train_tensor[:, 1], X_train_tensor[:, 2])
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_train_np = y_home_train_tensor.cpu().numpy()
    y_away_train_np = y_away_train_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_train['home_team'])
    away_teams = team_label_encoder.inverse_transform(X_train['away_team'])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Actual Score': y_home_train_np,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Actual Score': y_away_train_np,
        'Predicted Score': away_outputs
    })

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(df_all, x='Actual Score', y='Predicted Score', color='Team', symbol='Opponent',
                     title='Teams Performance: Predicted vs Actual Scores',
                     labels={'Actual Score': 'Actual Score', 'Predicted Score': 'Predicted Score'},
                     hover_data=['Team', 'Opponent'])

    m, b = np.polyfit(df_all['Actual Score'], df_all['Predicted Score'], 1)
    trendline = m * df_all['Actual Score'] + b
    fig.add_trace(go.Scatter(x=df_all['Actual Score'], y=trendline, mode='lines', name='Trendline', line={"color": "red"}))

    line_x_values = np.linspace(0, 8)
    line_y_values = line_x_values
    fig.add_trace(go.Scatter(x=line_x_values, y=line_y_values, mode='lines', name='y = x', line={"color": "green"}))

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode='markers'))
    fig.show()


# predictions for upcoming games

In [None]:
extracted_data = []

for match in f:
    if not match["status"]["finished"]:
        extracted_data.append({
        "home_team": match["home"]["name"],
        "away_team": match["away"]["name"],
        "year" : pd.to_datetime(match["status"]["utcTime"]).year,
        "id" : match["id"]
      })
new_data = pd.DataFrame(extracted_data)

In [None]:
team_label_encoder = LabelEncoder()
year_label_encoder = LabelEncoder()

new_data["home_team"] = team_label_encoder.fit_transform(new_data["home_team"])
new_data["away_team"] = team_label_encoder.transform(new_data["away_team"])
new_data["year"] = year_label_encoder.fit_transform(new_data["year"])

X_new = new_data[selected_features]


X_new_tensor = torch.from_numpy(X_new.astype(np.float32).values).to(device)
# X_new_tensor, X_new_tensor[:, 0], X_new_tensor[:, 1], X_new_tensor[:, 2]

In [None]:
with torch.no_grad():
    home_outputs, away_outputs = model(
        X_new_tensor, X_new_tensor[:, 0], X_new_tensor[:, 1], X_new_tensor[:, 2]
    )
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(new_data["home_team"])
    away_teams = team_label_encoder.inverse_transform(new_data["away_team"])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Predicted Score': away_outputs
    })

    df_merged = pd.DataFrame()
    for i in range(len(df_home)):
        curr_home = df_home.iloc[i]
        curr_away = df_away.iloc[i]
        final = pd.DataFrame([{
            "home_team": curr_home["Team"],
            "away_team": curr_home["Opponent"],
            "home_predicted_score": curr_home["Predicted Score"],
            "away_predicted_score": curr_away["Predicted Score"],
            "predicted_result": "win" if round(curr_home["Predicted Score"]) > round(curr_away["Predicted Score"]) else "loss" if round(curr_home["Predicted Score"]) < round(curr_away["Predicted Score"]) else "draw",
        }])
        df_merged = pd.concat([df_merged, final])

    # print the next few fixtures with the predicted scores of home and away teams

    def colourmap(val):
        if val == "win":
            color = 'green'
        else:
            color = 'red'
        return 'color: %s' % color

    display(df_merged.head(10))

    # plotly graph of the next few fixtures with the predicted scores of home and away teams
    fig = px.scatter(
        df_merged,
        x="home_predicted_score",
        y="away_predicted_score",
        color="home_team",
        symbol="away_team",
        title="Teams Performance: Predicted Scores",
        hover_data=["home_team", "away_team"],
    )

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode="markers"))
    fig.show()