# fotmob

In [129]:
import re
import urllib.parse
from logging import getLevelName, getLogger
from typing import Optional, Union, List, Dict

import requests
from cachecontrol import CacheControl

VERSION = "1.2.0"


class FotMob:
    BASE_URL = "https://www.fotmob.com/api"
    LOGGER = getLogger(__name__)

    def __init__(
        self, proxies: Optional[dict] = None, logging_level: Optional[str] = "WARNING"
    ) -> None:
        SESSION = requests.Session()
        if proxies:
            SESSION.proxies.update(proxies)
        CACHE_SESSION = CacheControl(SESSION)

        if logging_level:
            if logging_level.upper() in [
                "DEBUG",
                "INFO",
                "WARNING",
                "ERROR",
                "CRITICAL",
            ]:
                self.LOGGER.setLevel(getLevelName(logging_level.upper()))
            else:
                print(f"Logging level {logging_level} not recognized!")

        self.session = CACHE_SESSION
        self.matches_url = f"{self.BASE_URL}/matches?"
        self.leagues_url = f"{self.BASE_URL}/leagues?"
        self.teams_url = f"{self.BASE_URL}/teams?"
        self.player_url = f"{self.BASE_URL}/playerData?"
        self.match_details_url = f"{self.BASE_URL}/matchDetails?"
        self.search_url = f"{self.BASE_URL}/searchData?"
        self.tv_listing_url = f"{self.BASE_URL}/tvlisting?"
        self.tv_listings_url = f"{self.BASE_URL}/tvlistings?"
        self.fixtures_url = f"{self.BASE_URL}/fixtures?"

    def _check_date(self, date: str) -> Union[re.Match, None]:
        pattern = re.compile(r"(20\d{2})(\d{2})(\d{2})")
        return pattern.match(date)

    def _execute_query(self, url: str):
        response = self.session.get(url)
        response.raise_for_status()
        self.LOGGER.debug(response)
        return response.json()

    def get_matches_by_date(
        self, date: str, time_zone: str = "America/New_York"
    ) -> dict:
        if self._check_date(date) != None:
            url = f"{self.matches_url}date={date}"
            return self._execute_query(url)
        return {}

    def get_team(
        self,
        id: int,
        tab: str = "overview",
        type: str = "league",
        time_zone: str = "America/New_York",
    ):
        url = f"{self.teams_url}id={id}&tab={tab}&type={type}"
        return self._execute_query(url)

    def get_player(self, id: int):
        url = f"{self.player_url}id={id}"
        return self._execute_query(url)

    def get_match_details(self, match_id: int):
        url = f"{self.match_details_url}matchId={match_id}"
        return self._execute_query(url)

    def get_match_tv_listing(self, match_id: int, country_code: str = "GB"):
        url = f"{self.tv_listing_url}matchId={match_id}&countryCode={country_code}"
        return self._execute_query(url)

    def get_tv_listings_country(self, country_code: str = "GB"):
        url = f"{self.tv_listings_url}countryCode={country_code}"
        return self._execute_query(url)

    def search(self, term: str, user_language: str = "en-GB,en"):
        searchterm = urllib.parse.quote_plus(term)
        url = f"{self.search_url}term={searchterm}&userLanguage={user_language}"
        return self._execute_query(url)

    def get_fixtures(self, id: str, season: str):
        url = f"{self.fixtures_url}id={id}&season={season}"
        return self._execute_query(url)

    def get_fixtures_overview(self, league_id: str, season_start: str, season_end: str) -> List[Dict]:
        url = f"{self.leagues_url}id={league_id}&season={season_start}%2F{season_end}"
        return self._execute_query(url)["overview"]["leagueOverviewMatches"]

# data

In [130]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.expand_frame_repr', False)
# pd.options.display.max_rows = 2000
pd.options.display.max_columns = 2000

In [131]:
Fot = FotMob()
f = (
    Fot.get_fixtures_overview("47", "2023", "2024")
    + Fot.get_fixtures_overview("47", "2022", "2023")
    + Fot.get_fixtures_overview("47", "2021", "2022")
    + Fot.get_fixtures_overview("47", "2020", "2021")
)
# example data:
"""
{
    "id": "3900932",
    "pageUrl": "/matches/arsenal-vs-crystal-palace/36ytc8#3900932",
    "opponent": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "home": {
        "id": "9826",
        "name": "Crystal Palace",
        "score": 0
    },
    "away": {
        "id": "9825",
        "name": "Arsenal",
        "score": 2
    },
    "displayTournament": true,
    "lnameArr": [
        "A",
        " | undefined"
    ],
    "notStarted": false,
    "tournament": {},
    "status": {
        "utcTime": "2022-08-05T19:00:00Z",
        "finished": true,
        "started": true,
        "cancelled": false,
        "scoreStr": "0 - 2",
        "reason": {
            "short": "FT",
            "shortKey": "fulltime_short",
            "long": "Full-Time",
            "longKey": "finished"
        }
    }
}
"""



'\n{\n    "id": "3900932",\n    "pageUrl": "/matches/arsenal-vs-crystal-palace/36ytc8#3900932",\n    "opponent": {\n        "id": "9826",\n        "name": "Crystal Palace",\n        "score": 0\n    },\n    "home": {\n        "id": "9826",\n        "name": "Crystal Palace",\n        "score": 0\n    },\n    "away": {\n        "id": "9825",\n        "name": "Arsenal",\n        "score": 2\n    },\n    "displayTournament": true,\n    "lnameArr": [\n        "A",\n        " | undefined"\n    ],\n    "notStarted": false,\n    "tournament": {},\n    "status": {\n        "utcTime": "2022-08-05T19:00:00Z",\n        "finished": true,\n        "started": true,\n        "cancelled": false,\n        "scoreStr": "0 - 2",\n        "reason": {\n            "short": "FT",\n            "shortKey": "fulltime_short",\n            "long": "Full-Time",\n            "longKey": "finished"\n        }\n    }\n}\n'

In [132]:
def extract_values(data):
    result = {}
    if data is None:
        return result
    for key, value in data.items():
        if isinstance(value, dict):
            result |= extract_values(value)
        elif isinstance(value, list) and len(value) == 2:
            if value[0]: result[f"home_{data['key']}"] = value[0]
            if value[1]: result[f"away_{data['key']}"] = value[1]
        elif isinstance(value, list) and len(value) > 2:
            for i in range(len(value)):
                result |= extract_values(value[i])
        
    return result

run = True

if run:
    extracted_data = []

    for match in f:
        temp_data = {}
        if match["status"]["finished"]:
            
            match_details = Fot.get_match_details(match["id"])["content"]
            stats = match_details["stats"]
            temp_data |= extract_values(stats)
            
            temp_data |= {
                "home_score": match["home"]["score"],
                "away_score": match["away"]["score"],
                "home_team": match["home"]["name"],
                "away_team": match["away"]["name"],
                "result": 1 if match["home"]["score"] > match["away"]["score"] else 0,
                "date": pd.to_datetime(match["status"]["utcTime"]),
                "year": pd.to_datetime(match["status"]["utcTime"]).year,
                "day": pd.to_datetime(match["status"]["utcTime"]).day_of_week,
                "id": match["id"],
            }
            
            print(temp_data)
            extracted_data.append(temp_data)
        

    data = pd.DataFrame(extracted_data)
    # store the data
    data.to_csv("data.csv")
    data
else:
    data = pd.read_csv("data.csv", index_col=0)

{'home_BallPossesion': 42, 'away_BallPossesion': 58, 'home_expected_goals': '0.07', 'away_expected_goals': '1.30', 'home_total_shots': 2, 'away_total_shots': 12, 'home_ShotsOnTarget': 1, 'away_ShotsOnTarget': 5, 'away_big_chance': 1, 'away_big_chance_missed_title': 1, 'home_accurate_passes': '185 (83%)', 'away_accurate_passes': '275 (90%)', 'home_fouls': 6, 'away_fouls': 3, 'home_corners': 5, 'away_corners': 4, 'home_ShotsOffTarget': 1, 'away_ShotsOffTarget': 2, 'home_blocked_shots': 1, 'away_blocked_shots': 5, 'home_shots_inside_box': 1, 'away_shots_inside_box': 10, 'home_shots_outside_box': 1, 'away_shots_outside_box': 2, 'home_expected_goals_open_play': '0.04', 'away_expected_goals_open_play': '0.47', 'home_expected_goals_set_play': '0.02', 'away_expected_goals_set_play': '0.83', 'home_expected_goals_non_penalty': '0.07', 'away_expected_goals_non_penalty': '1.30', 'home_expected_goals_on_target': '0.00', 'away_expected_goals_on_target': '1.28', 'home_passes': 223, 'away_passes': 304

In [None]:
data["home_team_code"] = data["home_team"].astype("category").cat.codes
data["away_team_code"] = data["away_team"].astype("category").cat.codes

train = data[data["year"] < 2022]
test = data[data["year"] > 2022]


predictors = ["day", "year", "home_team_code", "away_team_code"]

data.columns

Index(['home_BallPossesion', 'away_BallPossesion', 'home_expected_goals',
       'away_expected_goals', 'home_total_shots', 'away_total_shots',
       'home_ShotsOnTarget', 'away_ShotsOnTarget', 'home_big_chance',
       'away_big_chance', 'home_big_chance_missed_title',
       'away_big_chance_missed_title', 'home_accurate_passes',
       'away_accurate_passes', 'home_fouls', 'away_fouls', 'home_corners',
       'away_corners', 'home_shots', 'away_shots', 'home_ShotsOffTarget',
       'away_ShotsOffTarget', 'home_blocked_shots', 'away_blocked_shots',
       'home_shots_woodwork', 'away_shots_woodwork', 'home_shots_inside_box',
       'away_shots_inside_box', 'home_shots_outside_box',
       'away_shots_outside_box', 'home_expected_goals_open_play',
       'away_expected_goals_open_play', 'home_expected_goals_set_play',
       'away_expected_goals_set_play', 'home_expected_goals_non_penalty',
       'away_expected_goals_non_penalty', 'home_expected_goals_on_target',
       'away_expect

# main

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(f"Using device: {device}")

Using device: cpu


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=42)

rf.fit(train[predictors], train["result"])

preds = rf.predict(test[predictors])

acc = accuracy_score(test["result"], preds)
prec = precision_score(test["result"], preds)

combined = pd.DataFrame(dict(actual=test["result"], prediction=preds))
display(pd.crosstab(index=combined['actual'], columns=[combined['prediction']]))

acc, prec

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,136,46
1,93,72


(0.5994236311239193, 0.6101694915254238)

In [None]:
import re

# split home_accurate_passes e.g. 185 (83%) to home_accurate_passes and home_accurate_passes_percentage
for col in data.columns:
    if isinstance(data[col][0], str):
        if data[col][0].endswith(")"):
            for i in range(len(data)):
                print(data.loc[i, col])
                if data.loc[i, col] != "0" and isinstance(data.loc[i, col], str):
                    pattern = r'(\d+) \((\d+)%\)'
                    matches = re.findall(pattern, data.loc[i, col])[0]

                    data.loc[i, col] = float(matches[0])
                    data.loc[i, f"{col}_percentage"] = int(matches[1]) / 100.0

185 (83%)
292 (89%)
179 (78%)
264 (93%)
173 (79%)
88 (65%)
215 (90%)
106 (79%)
325 (89%)
134 (77%)
180 (79%)
198 (86%)
228 (85%)
230 (86%)
225 (84%)
279 (89%)
237 (88%)
88 (69%)
249 (91%)
255 (90%)
156 (80%)
293 (93%)
196 (86%)
165 (81%)
211 (82%)
342 (88%)
237 (86%)
76 (68%)
299 (89%)
153 (74%)
125 (73%)
209 (83%)
192 (81%)
345 (89%)
393 (92%)
254 (93%)
171 (80%)
248 (90%)
277 (87%)
149 (78%)
257 (87%)
374 (90%)
213 (85%)
237 (84%)
123 (84%)
136 (70%)
122 (81%)
84 (67%)
149 (77%)
185 (79%)
151 (77%)
82 (66%)
228 (81%)
300 (89%)
183 (86%)
232 (88%)
183 (85%)
282 (88%)
181 (82%)
111 (75%)
176 (84%)
197 (77%)
323 (89%)
229 (84%)
221 (87%)
103 (73%)
311 (93%)
86 (65%)
272 (91%)
129 (71%)
124 (72%)
168 (84%)
177 (81%)
212 (81%)
267 (91%)
224 (88%)
181 (85%)
139 (73%)
129 (75%)
236 (86%)
368 (89%)
69 (63%)
175 (81%)
213 (88%)
254 (88%)
139 (70%)
209 (83%)
82 (63%)
188 (88%)
231 (85%)
60 (65%)
314 (88%)
106 (69%)
279 (91%)
170 (82%)
266 (83%)
263 (90%)
308 (86%)
310 (89%)
166 (81%)
131 (70%)

In [None]:

teams = pd.unique(data[["home_team", "away_team"]].values.ravel("K"))

team_dfs = {}

for team in teams:
    team_data = data[(data["home_team"] == team) | (data["away_team"] == team)]
    team_data = team_data.assign(is_home=team_data["home_team"] == team)
    team_data["is_home"] = team_data["is_home"].astype(int)



    team_dfs[team] = team_data

group = team_dfs["Man City"]
group.head()

Unnamed: 0,home_BallPossesion,away_BallPossesion,home_expected_goals,away_expected_goals,home_total_shots,away_total_shots,home_ShotsOnTarget,away_ShotsOnTarget,home_big_chance,away_big_chance,home_big_chance_missed_title,away_big_chance_missed_title,home_accurate_passes,away_accurate_passes,home_fouls,away_fouls,home_corners,away_corners,home_shots,away_shots,home_ShotsOffTarget,away_ShotsOffTarget,home_blocked_shots,away_blocked_shots,home_shots_woodwork,away_shots_woodwork,home_shots_inside_box,away_shots_inside_box,home_shots_outside_box,away_shots_outside_box,home_expected_goals_open_play,away_expected_goals_open_play,home_expected_goals_set_play,away_expected_goals_set_play,home_expected_goals_non_penalty,away_expected_goals_non_penalty,home_expected_goals_on_target,away_expected_goals_on_target,home_passes,away_passes,home_own_half_passes,away_own_half_passes,home_opposition_half_passes,away_opposition_half_passes,home_long_balls_accurate,away_long_balls_accurate,home_accurate_crosses,away_accurate_crosses,home_player_throws,away_player_throws,home_Offsides,away_Offsides,home_defense,away_defense,home_tackles_succeeded,away_tackles_succeeded,home_interceptions,away_interceptions,home_shot_blocks,away_shot_blocks,home_clearances,away_clearances,home_keeper_saves,away_keeper_saves,home_duels,away_duels,home_duel_won,away_duel_won,home_ground_duels_won,away_ground_duels_won,home_aerials_won,away_aerials_won,home_dribbles_succeeded,away_dribbles_succeeded,home_discipline,away_discipline,home_yellow_cards,away_yellow_cards,home_red_cards,away_red_cards,home_score,away_score,home_team,away_team,result,date,year,day,id,home_touches_opp_box,away_touches_opp_box,home_team_code,away_team_code,home_accurate_passes_percentage,away_accurate_passes_percentage,home_long_balls_accurate_percentage,away_long_balls_accurate_percentage,home_accurate_crosses_percentage,away_accurate_crosses_percentage,home_tackles_succeeded_percentage,away_tackles_succeeded_percentage,home_ground_duels_won_percentage,away_ground_duels_won_percentage,home_aerials_won_percentage,away_aerials_won_percentage,home_dribbles_succeeded_percentage,away_dribbles_succeeded_percentage,is_home
0,42.0,58.0,0.07,1.3,2.0,12.0,0.0,5.0,0.0,1.0,0.0,0.0,185.0,275.0,6.0,3.0,5.0,4.0,,,1.0,2.0,1.0,5.0,0.0,0.0,1.0,10.0,1.0,2.0,0.04,0.47,0.02,0.83,0.07,1.3,0.0,1.28,223.0,304.0,126.0,178.0,59.0,97.0,14.0,18.0,2.0,2.0,10.0,5.0,0.0,0.0,,,3.0,6.0,3.0,2.0,5.0,1.0,10.0,4.0,4.0,0.0,,,15.0,23.0,8.0,17.0,7.0,6.0,1.0,1.0,,,0.0,0.0,1.0,0.0,0,3,Burnley,Man City,0,2023-08-11 19:00:00+00:00,2023,4,4193450,,,5,14,0.83,0.9,0.45,0.53,0.4,0.22,0.75,0.6,0.32,0.68,0.54,0.46,0.11,1.0,0
15,60.0,40.0,0.56,0.22,5.0,5.0,3.0,1.0,0.0,0.0,0.0,0.0,279.0,175.0,6.0,8.0,2.0,0.0,,,1.0,2.0,1.0,2.0,0.0,0.0,5.0,2.0,0.0,3.0,0.56,0.22,0.0,0.0,0.56,0.22,0.78,0.11,314.0,208.0,135.0,111.0,144.0,64.0,11.0,11.0,0.0,2.0,9.0,8.0,0.0,3.0,,,2.0,2.0,2.0,3.0,1.0,1.0,4.0,4.0,1.0,3.0,,,20.0,20.0,19.0,16.0,1.0,4.0,7.0,3.0,,,1.0,3.0,0.0,0.0,1,0,Man City,Newcastle,1,2023-08-19 19:00:00+00:00,2023,5,4193475,,,14,16,0.89,0.84,0.46,0.52,0.0,0.67,0.5,0.29,0.54,0.46,0.2,0.8,0.64,0.43,1
27,25.0,75.0,0.67,2.01,6.0,18.0,2.0,6.0,1.0,3.0,1.0,2.0,76.0,316.0,6.0,2.0,1.0,7.0,,,4.0,9.0,0.0,3.0,0.0,0.0,4.0,11.0,2.0,7.0,0.5,1.94,0.18,0.07,0.67,2.01,0.36,1.55,112.0,343.0,46.0,118.0,30.0,198.0,15.0,15.0,2.0,4.0,4.0,5.0,0.0,1.0,,,4.0,3.0,2.0,1.0,3.0,1.0,9.0,5.0,4.0,0.0,,,13.0,23.0,10.0,16.0,3.0,7.0,3.0,7.0,,,3.0,0.0,0.0,0.0,1,2,Sheff Utd,Man City,0,2023-08-27 13:00:00+00:00,2023,6,4193489,,,19,14,0.68,0.92,0.56,0.63,1.0,0.31,0.67,1.0,0.38,0.62,0.3,0.7,0.75,0.7,0
34,75.0,25.0,1.58,0.41,4.0,3.0,3.0,2.0,2.0,1.0,0.0,1.0,393.0,108.0,3.0,6.0,2.0,2.0,,,0.0,1.0,1.0,0.0,0.0,0.0,4.0,2.0,0.0,1.0,0.79,0.41,0.0,0.0,0.79,0.41,2.14,0.7,426.0,138.0,245.0,75.0,148.0,33.0,18.0,9.0,0.0,1.0,8.0,8.0,1.0,1.0,,,6.0,3.0,3.0,0.0,0.0,1.0,4.0,3.0,2.0,0.0,,,24.0,13.0,20.0,9.0,4.0,4.0,6.0,2.0,,,1.0,2.0,0.0,0.0,5,1,Man City,Fulham,1,2023-09-02 14:00:00+00:00,2023,5,4193498,,,14,9,0.92,0.78,0.72,0.6,0.0,0.33,0.75,0.75,0.69,0.31,0.5,0.5,0.75,0.67,1
44,27.0,73.0,0.14,1.66,3.0,12.0,1.0,8.0,0.0,4.0,0.0,2.0,123.0,370.0,4.0,5.0,2.0,3.0,,,1.0,3.0,1.0,1.0,0.0,1.0,3.0,8.0,0.0,4.0,0.06,1.54,0.07,0.13,0.14,1.66,0.32,3.96,146.0,398.0,84.0,136.0,39.0,234.0,11.0,14.0,2.0,3.0,6.0,8.0,0.0,0.0,,,4.0,3.0,12.0,4.0,1.0,1.0,11.0,3.0,5.0,1.0,,,17.0,15.0,13.0,11.0,4.0,4.0,2.0,3.0,,,2.0,1.0,0.0,0.0,1,3,West Ham,Man City,0,2023-09-16 14:00:00+00:00,2023,5,4193508,,,24,14,0.84,0.93,0.58,0.82,0.4,0.38,0.67,0.75,0.54,0.46,0.5,0.5,0.5,0.5,0


In [None]:
def rolling_averages(group, cols, new_cols):
    group = group.sort_values("date")
    rolling_stats = group[cols].rolling(3, closed='left').mean()
    group[new_cols] = rolling_stats
    group = group.dropna(subset=new_cols)
    
    return group

cols = ['home_BallPossesion', 'away_BallPossesion', 'home_expected_goals',
       'away_expected_goals', 'home_total_shots', 'away_total_shots',
       'home_ShotsOnTarget', 'away_ShotsOnTarget', 'home_big_chance',
       'away_big_chance', 'home_big_chance_missed_title',
       'away_big_chance_missed_title', 'home_accurate_passes',
       'away_accurate_passes', 'home_fouls', 'away_fouls', 'home_corners',
       'away_corners', 'home_shots', 'away_shots', 'home_ShotsOffTarget',
       'away_ShotsOffTarget', 'home_blocked_shots', 'away_blocked_shots',
       'home_shots_woodwork', 'away_shots_woodwork', 'home_shots_inside_box',
       'away_shots_inside_box', 'home_shots_outside_box',
       'away_shots_outside_box', 'home_expected_goals_open_play',
       'away_expected_goals_open_play', 'home_expected_goals_set_play',
       'away_expected_goals_set_play', 'home_expected_goals_non_penalty',
       'away_expected_goals_non_penalty', 'home_expected_goals_on_target',
       'away_expected_goals_on_target', 'home_passes', 'away_passes',
       'home_own_half_passes', 'away_own_half_passes',
       'home_opposition_half_passes', 'away_opposition_half_passes',
       'home_long_balls_accurate', 'away_long_balls_accurate',
       'home_accurate_crosses', 'away_accurate_crosses', 'home_player_throws',
       'away_player_throws', 'home_Offsides', 'away_Offsides', 'home_defense',
       'away_defense', 'home_tackles_succeeded', 'away_tackles_succeeded',
       'home_interceptions', 'away_interceptions', 'home_shot_blocks',
       'away_shot_blocks', 'home_clearances', 'away_clearances',
       'home_keeper_saves', 'away_keeper_saves', 'home_duels', 'away_duels',
       'home_duel_won', 'away_duel_won', 'home_ground_duels_won',
       'away_ground_duels_won', 'home_aerials_won', 'away_aerials_won',
       'home_dribbles_succeeded', 'away_dribbles_succeeded', 'home_discipline',
       'away_discipline', 'home_yellow_cards', 'away_yellow_cards',
       'home_red_cards', 'away_red_cards', 'home_score', 'away_score',
       'home_touches_opp_box', 'away_touches_opp_box']
new_cols = [f"{c}_rolling" for c in cols]

In [None]:
rolling_averages(group, cols, new_cols)

Unnamed: 0,home_BallPossesion,away_BallPossesion,home_expected_goals,away_expected_goals,home_total_shots,away_total_shots,home_ShotsOnTarget,away_ShotsOnTarget,home_big_chance,away_big_chance,home_big_chance_missed_title,away_big_chance_missed_title,home_accurate_passes,away_accurate_passes,home_fouls,away_fouls,home_corners,away_corners,home_shots,away_shots,home_ShotsOffTarget,away_ShotsOffTarget,home_blocked_shots,away_blocked_shots,home_shots_woodwork,away_shots_woodwork,home_shots_inside_box,away_shots_inside_box,home_shots_outside_box,away_shots_outside_box,home_expected_goals_open_play,away_expected_goals_open_play,home_expected_goals_set_play,away_expected_goals_set_play,home_expected_goals_non_penalty,away_expected_goals_non_penalty,home_expected_goals_on_target,away_expected_goals_on_target,home_passes,away_passes,home_own_half_passes,away_own_half_passes,home_opposition_half_passes,away_opposition_half_passes,home_long_balls_accurate,away_long_balls_accurate,home_accurate_crosses,away_accurate_crosses,home_player_throws,away_player_throws,home_Offsides,away_Offsides,home_defense,away_defense,home_tackles_succeeded,away_tackles_succeeded,home_interceptions,away_interceptions,home_shot_blocks,away_shot_blocks,home_clearances,away_clearances,home_keeper_saves,away_keeper_saves,home_duels,away_duels,home_duel_won,away_duel_won,home_ground_duels_won,away_ground_duels_won,home_aerials_won,away_aerials_won,home_dribbles_succeeded,away_dribbles_succeeded,home_discipline,away_discipline,home_yellow_cards,away_yellow_cards,home_red_cards,away_red_cards,home_score,away_score,home_team,away_team,result,date,year,day,id,home_touches_opp_box,away_touches_opp_box,home_team_code,away_team_code,home_accurate_passes_percentage,away_accurate_passes_percentage,home_long_balls_accurate_percentage,away_long_balls_accurate_percentage,home_accurate_crosses_percentage,away_accurate_crosses_percentage,home_tackles_succeeded_percentage,away_tackles_succeeded_percentage,home_ground_duels_won_percentage,away_ground_duels_won_percentage,home_aerials_won_percentage,away_aerials_won_percentage,home_dribbles_succeeded_percentage,away_dribbles_succeeded_percentage,is_home,home_BallPossesion_rolling,away_BallPossesion_rolling,home_expected_goals_rolling,away_expected_goals_rolling,home_total_shots_rolling,away_total_shots_rolling,home_ShotsOnTarget_rolling,away_ShotsOnTarget_rolling,home_big_chance_rolling,away_big_chance_rolling,home_big_chance_missed_title_rolling,away_big_chance_missed_title_rolling,home_accurate_passes_rolling,away_accurate_passes_rolling,home_fouls_rolling,away_fouls_rolling,home_corners_rolling,away_corners_rolling,home_shots_rolling,away_shots_rolling,home_ShotsOffTarget_rolling,away_ShotsOffTarget_rolling,home_blocked_shots_rolling,away_blocked_shots_rolling,home_shots_woodwork_rolling,away_shots_woodwork_rolling,home_shots_inside_box_rolling,away_shots_inside_box_rolling,home_shots_outside_box_rolling,away_shots_outside_box_rolling,home_expected_goals_open_play_rolling,away_expected_goals_open_play_rolling,home_expected_goals_set_play_rolling,away_expected_goals_set_play_rolling,home_expected_goals_non_penalty_rolling,away_expected_goals_non_penalty_rolling,home_expected_goals_on_target_rolling,away_expected_goals_on_target_rolling,home_passes_rolling,away_passes_rolling,home_own_half_passes_rolling,away_own_half_passes_rolling,home_opposition_half_passes_rolling,away_opposition_half_passes_rolling,home_long_balls_accurate_rolling,away_long_balls_accurate_rolling,home_accurate_crosses_rolling,away_accurate_crosses_rolling,home_player_throws_rolling,away_player_throws_rolling,home_Offsides_rolling,away_Offsides_rolling,home_defense_rolling,away_defense_rolling,home_tackles_succeeded_rolling,away_tackles_succeeded_rolling,home_interceptions_rolling,away_interceptions_rolling,home_shot_blocks_rolling,away_shot_blocks_rolling,home_clearances_rolling,away_clearances_rolling,home_keeper_saves_rolling,away_keeper_saves_rolling,home_duels_rolling,away_duels_rolling,home_duel_won_rolling,away_duel_won_rolling,home_ground_duels_won_rolling,away_ground_duels_won_rolling,home_aerials_won_rolling,away_aerials_won_rolling,home_dribbles_succeeded_rolling,away_dribbles_succeeded_rolling,home_discipline_rolling,away_discipline_rolling,home_yellow_cards_rolling,away_yellow_cards_rolling,home_red_cards_rolling,away_red_cards_rolling,home_score_rolling,away_score_rolling,home_touches_opp_box_rolling,away_touches_opp_box_rolling


### graph of test data

In [None]:
import plotly.express as px
import plotly.graph_objects as go

with torch.no_grad():
    home_outputs, away_outputs = model(
        X_test_tensor, X_test_tensor[:, 0], X_test_tensor[:, 1], X_test_tensor[:, 2]
    )
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_test_np = y_home_test_tensor.cpu().numpy()
    y_away_test_np = y_away_test_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_test["home_team"])
    away_teams = team_label_encoder.inverse_transform(X_test["away_team"])

    df_home = pd.DataFrame(
        {
            "Team": home_teams,
            "Opponent": away_teams,
            "Actual Score": y_home_test_np,
            "Predicted Score": home_outputs,
        }
    )

    df_away = pd.DataFrame(
        {
            "Team": away_teams,
            "Opponent": home_teams,
            "Actual Score": y_away_test_np,
            "Predicted Score": away_outputs,
        }
    )


    df_merged = pd.DataFrame()
    for i in range(len(df_home)):
        curr_home = df_home.iloc[i]
        curr_away = df_away.iloc[i]
        final = pd.DataFrame([{
            "home_team": curr_home["Team"],
            "away_team": curr_home["Opponent"],
            "home_actual_score": curr_home["Actual Score"],
            "away_actual_score": curr_away["Actual Score"],
            "actual_result": "win" if curr_home["Actual Score"] > curr_away["Actual Score"] else "loss" if curr_home["Actual Score"] < curr_away["Actual Score"] else "draw",
            "home_predicted_score": curr_home["Predicted Score"],
            "away_predicted_score": curr_away["Predicted Score"],
            "predicted_result": "win" if round(curr_home["Predicted Score"]) > round(curr_away["Predicted Score"]) else "loss" if round(curr_home["Predicted Score"]) < round(curr_away["Predicted Score"]) else "draw",
            # "predicted_result": "win" if curr_home["Predicted Score"] > curr_away["Predicted Score"] else "loss" if curr_home["Predicted Score"] < curr_away["Predicted Score"] else "draw",
        }])
        df_merged = pd.concat([df_merged, final])

    # Calculate the accuracy for predictions
    df_merged["prediction_accuracy"] = df_merged["actual_result"] == df_merged["predicted_result"]
    prediction_accuracy = df_merged["prediction_accuracy"].sum() / len(df_merged)
    print(f"Prediction Accuracy: {prediction_accuracy * 100}%")

    # calculate the accuracy for predicting home team always wins
    correct_win_home_team = df_merged["actual_result"] == "win"
    win_accuracy_home = correct_win_home_team.sum() / len(df_merged)
    print(f"Win Accuracy of home team always wins strategy: {win_accuracy_home * 100}")

    # calculate the accuracy for predicting home team always wins
    correct_win_away_team = df_merged["actual_result"] == "loss"
    win_accuracy_away = correct_win_away_team.sum() / len(df_merged)
    print(f"Win Accuracy of away team always wins strategy: {win_accuracy_away * 100}")

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(
        df_merged,
        x="home_actual_score",
        y="home_predicted_score",
        color="home_team",
        symbol="away_team",
        title="Teams Performance: Predicted vs Actual Scores",
        hover_data=["home_team", "away_team", "actual_result", "predicted_result", "home_predicted_score", "away_predicted_score", "home_actual_score", "away_actual_score"],
    )

    m, b = np.polyfit(df_all["Actual Score"], df_all["Predicted Score"], 1)
    trendline = m * df_all["Actual Score"] + b
    fig.add_trace(
        go.Scatter(
            x=df_all["Actual Score"],
            y=trendline,
            mode="lines",
            name="Trendline",
            line={"color": "red"},
        )
    )
    print(f"Gradient: {m}, Target: 1")
    """
    Best so far:

    Win Accuracy: 53.84615384615385%
    Gradient: 0.5826897630277947
    team_embedding_dim = 1100
    num_epochs = 1000
    hidden_size = 500
    lr=0.000003
    6 layers
    """
    line_x_values = np.linspace(0, 5)
    line_y_values = line_x_values
    fig.add_trace(
        go.Scatter(
            x=line_x_values,
            y=line_y_values,
            mode="lines",
            name="y = x",
            line={"color": "green"},
        )
    )

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode="markers"))
    fig.show()

NameError: name 'model' is not defined

### graph of training data

In [None]:
with torch.no_grad():
    home_outputs, away_outputs = model(X_train_tensor, X_train_tensor[:, 0], X_train_tensor[:, 1], X_train_tensor[:, 2])
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    y_home_train_np = y_home_train_tensor.cpu().numpy()
    y_away_train_np = y_away_train_tensor.cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(X_train['home_team'])
    away_teams = team_label_encoder.inverse_transform(X_train['away_team'])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Actual Score': y_home_train_np,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Actual Score': y_away_train_np,
        'Predicted Score': away_outputs
    })

    df_all = pd.concat([df_home, df_away])

    fig = px.scatter(df_all, x='Actual Score', y='Predicted Score', color='Team', symbol='Opponent',
                     title='Teams Performance: Predicted vs Actual Scores',
                     labels={'Actual Score': 'Actual Score', 'Predicted Score': 'Predicted Score'},
                     hover_data=['Team', 'Opponent'])

    m, b = np.polyfit(df_all['Actual Score'], df_all['Predicted Score'], 1)
    trendline = m * df_all['Actual Score'] + b
    fig.add_trace(go.Scatter(x=df_all['Actual Score'], y=trendline, mode='lines', name='Trendline', line={"color": "red"}))

    line_x_values = np.linspace(0, 8)
    line_y_values = line_x_values
    fig.add_trace(go.Scatter(x=line_x_values, y=line_y_values, mode='lines', name='y = x', line={"color": "green"}))

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode='markers'))
    fig.show()


# predictions for upcoming games

In [None]:
extracted_data = []

for match in f:
    if not match["status"]["finished"]:
        extracted_data.append({
        "home_team": match["home"]["name"],
        "away_team": match["away"]["name"],
        "year" : pd.to_datetime(match["status"]["utcTime"]).year,
        "id" : match["id"]
      })
new_data = pd.DataFrame(extracted_data)

In [None]:
team_label_encoder = LabelEncoder()
year_label_encoder = LabelEncoder()

new_data["home_team"] = team_label_encoder.fit_transform(new_data["home_team"])
new_data["away_team"] = team_label_encoder.transform(new_data["away_team"])
new_data["year"] = year_label_encoder.fit_transform(new_data["year"])

X_new = new_data[selected_features]


X_new_tensor = torch.from_numpy(X_new.astype(np.float32).values).to(device)
# X_new_tensor, X_new_tensor[:, 0], X_new_tensor[:, 1], X_new_tensor[:, 2]

In [None]:
with torch.no_grad():
    home_outputs, away_outputs = model(
        X_new_tensor, X_new_tensor[:, 0], X_new_tensor[:, 1], X_new_tensor[:, 2]
    )
    home_outputs = home_outputs.squeeze().cpu().numpy()
    away_outputs = away_outputs.squeeze().cpu().numpy()

    home_teams = team_label_encoder.inverse_transform(new_data["home_team"])
    away_teams = team_label_encoder.inverse_transform(new_data["away_team"])

    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Predicted Score': away_outputs
    })

    df_merged = pd.DataFrame()
    for i in range(len(df_home)):
        curr_home = df_home.iloc[i]
        curr_away = df_away.iloc[i]
        final = pd.DataFrame([{
            "home_team": curr_home["Team"],
            "away_team": curr_home["Opponent"],
            "home_predicted_score": curr_home["Predicted Score"],
            "away_predicted_score": curr_away["Predicted Score"],
            "predicted_result": "win" if round(curr_home["Predicted Score"]) > round(curr_away["Predicted Score"]) else "loss" if round(curr_home["Predicted Score"]) < round(curr_away["Predicted Score"]) else "draw",
        }])
        df_merged = pd.concat([df_merged, final])

    # print the next few fixtures with the predicted scores of home and away teams

    def colourmap(val):
        if val == "win":
            color = 'green'
        else:
            color = 'red'
        return 'color: %s' % color

    display(df_merged.head(10))

    # plotly graph of the next few fixtures with the predicted scores of home and away teams
    fig = px.scatter(
        df_merged,
        x="home_predicted_score",
        y="away_predicted_score",
        color="home_team",
        symbol="away_team",
        title="Teams Performance: Predicted Scores",
        hover_data=["home_team", "away_team"],
    )

    fig.update_traces(marker=dict(size=7, opacity=0.7), selector=dict(mode="markers"))
    fig.show()