In [5]:
import warnings
from tqdm import tqdm

import pandas as pd
from helpers.file import parse_df_to_csv
from helpers import stats as bf

from services import MySQLService

warnings.filterwarnings("ignore")

In [6]:
league = "bundesliga"
start_season = 2017
end_season = 2023
n_last_games = 5

In [7]:
seasons = []
mysql_service = MySQLService()

for season in range(start_season, end_season + 1):
    where_clause = f"league = '{league}' AND season = {season}"
    data = mysql_service.get_data("matches", where_clause=where_clause)
    seasons.append(data)
    seasons[-1]["season"] = season

fixtures_df = pd.concat(seasons, axis=0).reset_index(drop=True)
fixtures_df.dropna(subset=["home_odds", "away_odds", "draw_odds"], inplace=True)

fixtures_df["home_odds"] = fixtures_df["home_odds"].astype(float)
fixtures_df["away_odds"] = fixtures_df["away_odds"].astype(float)
fixtures_df["draw_odds"] = fixtures_df["draw_odds"].astype(float)

fixtures_df["winner"] = fixtures_df.apply(
    lambda x: bf.get_winner(x["home_score"], x["away_score"]), axis=1
)

In [8]:
data_model = []

for index, game in tqdm(fixtures_df.iterrows(), total=len(fixtures_df)):
    if pd.isnull(game["home_odds"]):
        continue

    home_stats_dict = bf.get_team_previous_games_stats(
        game["home_team"], game["season"], game["date"], "H", n_last_games, fixtures_df
    )
    if not home_stats_dict:
        continue

    away_stats_dict = bf.get_team_previous_games_stats(
        game["away_team"], game["season"], game["date"], "A", n_last_games, fixtures_df
    )
    if not away_stats_dict:
        continue

    game_info_keys = [
        "date",
        "season",
        "home_team",
        "away_team",
        "home_odds",
        "away_odds",
        "draw_odds",
        "winner",
        "home_score",
        "away_score",
    ]
    game_info_dict = {key: game[key] for key in game_info_keys}

    data_model.append({**home_stats_dict, **away_stats_dict, **game_info_dict})

data_df = pd.DataFrame(data_model)

100%|██████████████████████████████████████████████████████████████████████████████| 2051/2051 [00:28<00:00, 71.85it/s]


In [12]:
display(data_df)

Unnamed: 0,home_points_pct,home_win_pct,home_draw_pct,home_loss_pct,home_points_pct_last_games,home_win_pct_last_games,home_draw_pct_last_games,home_loss_pct_last_games,home_home_wins_last_games,home_home_draws_last_games,...,date,season,home_team,away_team,home_odds,away_odds,draw_odds,winner,home_score,away_score
0,0.428571,0.321429,0.321429,0.357143,0.333333,0.2,0.4,0.4,0,0,...,2018-04-07,2017,Augsburg,Bayern Munich,7.13,1.45,4.61,A,1,4
1,0.466667,0.350000,0.350000,0.300000,0.333333,0.2,0.4,0.4,1,1,...,2018-02-04,2017,Augsburg,Eint Frankfurt,2.48,3.05,3.18,H,3,0
2,0.479167,0.375000,0.312500,0.312500,0.466667,0.4,0.2,0.4,1,1,...,2017-12-16,2017,Augsburg,Freiburg,1.79,4.63,3.70,D,3,3
3,0.470588,0.352941,0.352941,0.294118,0.533333,0.4,0.4,0.2,1,2,...,2018-01-13,2017,Augsburg,Hamburger SV,1.80,4.81,3.54,H,1,0
4,0.523810,0.428571,0.285714,0.285714,0.666667,0.6,0.2,0.2,1,1,...,2017-12-10,2017,Augsburg,Hertha BSC,2.13,3.69,3.28,D,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1387,0.410256,0.384615,0.076923,0.538462,0.266667,0.2,0.2,0.6,1,1,...,2023-12-09,2023,Wolfsburg,Freiburg,2.18,3.25,3.53,A,0,1
1388,0.385965,0.315789,0.210526,0.473684,0.400000,0.2,0.6,0.2,0,1,...,2024-02-04,2023,Wolfsburg,Hoffenheim,2.12,3.22,3.76,D,2,2
1389,0.388889,0.333333,0.166667,0.500000,0.333333,0.2,0.4,0.4,0,0,...,2024-01-27,2023,Wolfsburg,Köln,1.72,4.78,3.93,D,1,1
1390,0.393939,0.363636,0.090909,0.545455,0.066667,0.0,0.2,0.8,0,1,...,2023-11-25,2023,Wolfsburg,RB Leipzig,3.68,1.92,3.95,H,2,1


In [10]:
parse_df_to_csv(
    data_df,
    f"../dist/leagues/{league}/formatted_data",
    "{}-{}.csv".format(start_season, end_season),
)