In [12]:
from raw_data.loader import request_data, load_mappings_from_yaml, get_project_root
import os
import json


def load_standings_for_seasons(league_name, start_year, end_year):
    standings_data = {}
    project_root = get_project_root()
    league_path = os.path.join(project_root, 'raw_data', league_name)

    for season in range(start_year, end_year + 1):  # Adjusted to include end_year in the loop
        season_str = str(season)
        file_path = os.path.join(league_path, season_str, 'league_data.json')
        if os.path.exists(file_path):
            with open(file_path, 'r') as file:
                json_data = json.load(file)

                # Debugging: Check if 'response' key is present and not empty
                if 'response' in json_data and json_data['response']:
                    standings = json_data['response'][0].get('league', {}).get('standings', [])
                    standings_data[season_str] = standings
                    print(f"Extracted standings for {league_name} {season}.")
                else:
                    print(f"'response' key missing or empty for {league_name} {season}.")
        else:
            print(f"No data found for {league_name} {season}.")

    return standings_data


# Example usage
league_name = 'Premier League'
start_year = 2010
end_year = 2022
standings_data = load_standings_for_seasons(league_name, start_year, end_year)


Extracted standings for Premier League 2010.
Extracted standings for Premier League 2011.
Extracted standings for Premier League 2012.
Extracted standings for Premier League 2013.
Extracted standings for Premier League 2014.
Extracted standings for Premier League 2015.
Extracted standings for Premier League 2016.
Extracted standings for Premier League 2017.
Extracted standings for Premier League 2018.
Extracted standings for Premier League 2019.
Extracted standings for Premier League 2020.
Extracted standings for Premier League 2021.
Extracted standings for Premier League 2022.


In [13]:
standings_data

{'2010': [[{'rank': 1,
    'team': {'id': 33,
     'name': 'Manchester United',
     'logo': 'https://media.api-sports.io/football/teams/33.png'},
    'points': 80,
    'goalsDiff': 41,
    'group': 'Premier League',
    'form': 'WDWLW',
    'status': None,
    'description': 'UEFA Champions League',
    'all': {'played': 38,
     'win': 23,
     'draw': 11,
     'lose': 4,
     'goals': {'for': 78, 'against': 37}},
    'home': {'played': 19,
     'win': 18,
     'draw': 1,
     'lose': 0,
     'goals': {'for': 49, 'against': 12}},
    'away': {'played': 19,
     'win': 5,
     'draw': 10,
     'lose': 4,
     'goals': {'for': 29, 'against': 25}},
    'update': '2018-02-15T00:00:00+00:00'},
   {'rank': 2,
    'team': {'id': 49,
     'name': 'Chelsea',
     'logo': 'https://media.api-sports.io/football/teams/49.png'},
    'points': 71,
    'goalsDiff': 36,
    'group': 'Premier League',
    'form': 'LDLWW',
    'status': None,
    'description': 'UEFA Champions League',
    'all': {'pla

In [14]:
with open(f"standings_{league_name}", 'w') as file:
    json.dump(standings_data, file, indent=4)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming standings_data is already loaded as shown above

# Step 2: Prepare the Data
def prepare_data(standings_data):
    rows = []
    for season, standings_list in standings_data.items():
        for standings in standings_list[0]:  # Assuming there's only one list in each season
            row = {
                'Season': season,
                'Rank': standings['rank'],
                'Team': standings['team']['name'],
                'Points': standings['points'],
                'Form': standings['form'],
                'Played': standings['all']['played'],
                'Win': standings['all']['win'],
                'Draw': standings['all']['draw'],
                'Lose': standings['all']['lose'],
                'GoalsDiff': standings['goalsDiff'],
                'GoalsFor': standings['all']['goals']['for'],
                'GoalsAgainst': standings['all']['goals']['against'],
            }
            rows.append(row)
    return pd.DataFrame(rows)

df = prepare_data(standings_data)

# Display the DataFrame for a quick check
print(df.head())

# Step 3: Data Analysis and Visualization

# Example: Plotting the points of the top 5 teams across seasons
top_teams = df[df['Rank'] <= 5].pivot(index='Season', columns='Team', values='Points')
top_teams.plot(kind='line', marker='o', figsize=(10, 6))
plt.title('Top 5 Teams Points Across Seasons')
plt.ylabel('Points')
plt.xlabel('Season')
plt.xticks(rotation=45)
plt.legend(title='Team')
plt.tight_layout()
plt.show()

# Example: Creating a table for a specific season
season = '2010'
season_df = df[df['Season'] == season].sort_values(by='Rank')
print(season_df[['Rank', 'Team', 'Points', 'GoalsDiff', 'Form', 'Played', 'Win', 'Draw', 'Lose', 'GoalsFor', 'GoalsAgainst']])


In [17]:
df

Unnamed: 0,Season,Rank,Team,Points,GoalsDiff,Form,Played,Win,Draw,Lose,GoalsFor,GoalsAgainst
0,2010,1,Manchester United,80,41,WDWLW,38,23,11,4,78,37
1,2010,2,Chelsea,71,36,LDLWW,38,21,8,9,69,33
2,2010,3,Manchester City,71,27,WWWLW,38,21,8,9,60,33
3,2010,4,Arsenal,68,29,DLLWL,38,19,11,8,72,43
4,2010,5,Tottenham,62,9,WWLDL,38,16,14,8,55,46
...,...,...,...,...,...,...,...,...,...,...,...,...
255,2022,16,Nottingham Forest,38,-30,DWDWL,38,9,11,18,38,68
256,2022,17,Everton,36,-23,WDLWD,38,8,12,18,34,57
257,2022,18,Leicester,34,-17,WDLLD,38,9,7,22,51,68
258,2022,19,Leeds,31,-30,LLDLL,38,7,10,21,48,78
