In [4]:
from raw_data.loader import get_project_root, load_stages
import os
import json

import pandas as pd

In [5]:
def process_cup_fixtures(country, cup, season_start, season_end):
    all_matches = []
    project_root = get_project_root()
    stages = load_stages(country)

    # Loop through the seasons
    for season in range(season_start, season_end):
        # Construct the path to the season's fixtures file
        season_path = os.path.join(project_root, 'raw_data', country, cup, str(season), 'fixtures_data.json')

        # Check if the path exists
        if os.path.isfile(season_path):
            # Load the JSON data from the file
            with open(season_path, 'r') as file:
                fixtures_data = json.load(file)
            for fixture in fixtures_data['response']:
                round_name = fixture['league']['round']
                stage_number = stages.get(round_name)

                # Add entry for home team
                home_winner = fixture['teams']['home'].get('winner')
                away_winner = fixture['teams']['away'].get('winner')

                # If 'winner' is not None, convert it to int, otherwise, it remains None
                home_team_win = int(home_winner) if home_winner is not None else None
                away_team_win = int(away_winner) if away_winner is not None else None

                match_data_home = {
                    'Year': season,
                    'Round': round_name,
                    'Stage': stage_number,
                    'Team_i_name': fixture['teams']['home']['name'],
                    'Team_id_i': fixture['teams']['home']['id'],
                    'Opponent_j_name': fixture['teams']['away']['name'],
                    'Opponent_j_id': fixture['teams']['away']['id'],
                    'Team_i_win': home_team_win
                }
                all_matches.append(match_data_home)

                # The same logic applies for the away team
                match_data_away = {
                    'Year': season,
                    'Round': round_name,
                    'Stage': stage_number,
                    'Team_name': fixture['teams']['away']['name'],
                    'Team_id': fixture['teams']['away']['id'],
                    'Opponent_name': fixture['teams']['home']['name'],
                    'Opponent_id': fixture['teams']['home']['id'],
                    'Team_i_win': away_team_win
                }
                all_matches.append(match_data_away)

    # Convert the all_matches list to a DataFrame
    matches_df = pd.DataFrame(all_matches)

    return matches_df


country = 'Netherlands'
cup = 'KNVB_Beker'
season_start = 2017
season_end = 2023

fixtures_df = process_cup_fixtures(country, cup, season_start, season_end)
print(fixtures_df.head())

   Year        Round  Stage Team_i_name  Team_id_i Opponent_j_name  \
0  2017        Final    6.0  AZ Alkmaar      201.0       Feyenoord   
1  2017        Final    6.0         NaN        NaN             NaN   
2  2017  Semi-finals    5.0   Feyenoord      209.0       Willem II   
3  2017  Semi-finals    5.0         NaN        NaN             NaN   
4  2017  Semi-finals    5.0  AZ Alkmaar      201.0          Twente   

   Opponent_j_id  Team_i_win  Team_name  Team_id Opponent_name  Opponent_id  
0          209.0         0.0        NaN      NaN           NaN          NaN  
1            NaN         1.0  Feyenoord    209.0    AZ Alkmaar        201.0  
2          195.0         1.0        NaN      NaN           NaN          NaN  
3            NaN         0.0  Willem II    195.0     Feyenoord        209.0  
4          415.0         1.0        NaN      NaN           NaN          NaN  


NameError: name 'load_stages' is not defined