This notebook is used to demo fetching data and saving data from Sportmonks used for future projects. What is in scope is to get all fixtures for Europe's top five leagues for the past 5 seasons.

# Imports

In [1]:
import json
import pandas as pd
from sportmonks_class import Sportmonks
from sportmonks_config import (
    FIXTURES_EVENT_INCLUDES,
    FIXTURES_ODDS_INCLUDES,
)

# Functions

In [2]:
def save_dict(json_file, file_path):
    '''Function used to save json data'''
    # json
    with open(file_path, 'w') as file:
        json.dump(json_file, file)

# Instantiate Sportmonks Class

In [3]:
sm = Sportmonks()

# Fetch league ids of interest

In [5]:
leagues_df = sm.get_leagues()

Unnamed: 0,id,active,type,legacy_id,country_id,logo_path,name,is_cup,is_friendly,current_season_id,current_round_id,current_stage_id,live_standings,coverage.predictions,coverage.topscorer_goals,coverage.topscorer_assists,coverage.topscorer_cards
0,8,True,domestic,29,462,https://cdn.sportmonks.com/images/soccer/leagu...,Premier League,False,False,19734,274676.0,77457864.0,True,True,True,True,True
1,72,True,domestic,1,38,https://cdn.sportmonks.com/images/soccer/leagu...,Eredivisie,False,False,19726,274590.0,77457832.0,True,True,True,True,True
2,82,True,domestic,4,11,https://cdn.sportmonks.com/images/soccer/leagu...,Bundesliga,False,False,19744,274912.0,77457883.0,True,True,True,True,True
3,271,True,domestic,43,320,https://cdn.sportmonks.com/images/soccer/leagu...,Superliga,False,False,19686,274241.0,77457696.0,True,True,True,True,True
4,301,True,domestic,47,17,https://cdn.sportmonks.com/images//soccer/leag...,Ligue 1,False,False,19745,274942.0,77457884.0,True,True,True,True,True
5,384,True,domestic,22,251,https://cdn.sportmonks.com/images//soccer/leag...,Serie A,False,False,19806,276005.0,77458057.0,True,True,True,True,True
6,462,True,domestic,16,20,https://cdn.sportmonks.com/images//soccer/leag...,Liga Portugal,False,False,19896,277459.0,77458262.0,True,True,True,True,True
7,501,True,domestic,66,1161,https://cdn.sportmonks.com/images/soccer/leagu...,Premiership,False,False,19735,274720.0,77457866.0,True,True,True,True,True
8,564,True,domestic,19,32,https://cdn.sportmonks.com/images/soccer/leagu...,La Liga,False,False,19799,275889.0,77458033.0,True,True,True,True,True
9,600,True,domestic,46,404,https://cdn.sportmonks.com/images/soccer/leagu...,Super Lig,False,False,19900,277531.0,77458273.0,True,True,True,True,True


In [37]:
top_5_leagues_df = leagues_df[leagues_df['id'].isin([8,82,301,564,384])]

In [38]:
top_5_dict = dict(zip(top_5_leagues_df.id, top_5_leagues_df.name))
top_5_dict

{8: 'Premier League',
 82: 'Bundesliga',
 301: 'Ligue 1',
 384: 'Serie A',
 564: 'La Liga'}

# Fetch seasons of interest

Looking to get last 5 seasons worth of data

In [50]:
# first batch
# years_of_interest = [
#     '2017/2018',
#     '2018/2019',
#     '2019/2020',
#     '2020/2021',
#     '2021/2022'
# ]

# second batch
years_of_interest = [  
 '2010/2011',
 '2011/2012',
 '2012/2013',
 '2013/2014',
 '2014/2015',
 '2015/2016',
 '2016/2017',
]

In [51]:
seasons_json = sm.get_seasons()
seasons_df = pd.DataFrame(seasons_json)

# Basic data to be saved

Fetch data

In [17]:
# dataframes
countries_df = sm.get_countries()
leagues_df = sm.get_leagues()

# json files
seasons_json = sm.get_seasons()

Save data

In [19]:
# df
countries_df.to_csv('sportmonks_data/countries.csv', index=False)
leagues_df.to_csv('sportmonks_data/leagues.csv',index=False)

# json
save_dict(seasons_json, 'sportmonks_data/seasons.json')

# Fixtures data

NB: changed saving location for more past seasons

In [67]:
seasons_json = sm.get_seasons()

# for league_id in top_5_dict.keys():
for league_id in [8,82,301,384,564]:
#     for year in years_of_interest:
    for year in years_of_interest:
        # year format
        year_format = year.replace('/','_')
        print(f"Persisting data for league with league_id: {league_id} and year: {year}")
        
        # fetch data
        season_id = seasons_df.loc[lambda dfr: (dfr['league_id']==league_id) & (dfr['name']==year), 'id'].iloc[0]
        
        fixtures_w_events_json = sm.get_season_fixtures(
            season_id, 
            includes_list = FIXTURES_EVENT_INCLUDES,
            seasons_json=seasons_json
        )
        
        fixtures_w_odds_json = sm.get_season_fixtures(
            season_id, 
            includes_list = FIXTURES_ODDS_INCLUDES,
            seasons_json=seasons_json
        )
        
        # save data
        save_dict(
            fixtures_w_events_json, 
            f'sportmonks_data/further_past_seasons/fixtures_w_events_{league_id}_{year_format}.json'
        )
        print('Events data saved.')
        save_dict(
            fixtures_w_odds_json, 
            f'sportmonks_data/further_past_seasons/fixtures_w_odds_{league_id}_{year_format}.json'
        )
        print('Odds data saved.')
        

Persisting data for league with league_id: 8 and year: 2010/2011
Events data saved.
Odds data saved.
Persisting data for league with league_id: 8 and year: 2011/2012
Events data saved.
Odds data saved.
Persisting data for league with league_id: 8 and year: 2012/2013
Events data saved.
Odds data saved.
Persisting data for league with league_id: 8 and year: 2013/2014
Events data saved.
Odds data saved.
Persisting data for league with league_id: 8 and year: 2014/2015
Events data saved.
Odds data saved.
Persisting data for league with league_id: 8 and year: 2015/2016
Events data saved.
Odds data saved.
Persisting data for league with league_id: 8 and year: 2016/2017
Events data saved.
Odds data saved.
Persisting data for league with league_id: 82 and year: 2010/2011
Events data saved.
Odds data saved.
Persisting data for league with league_id: 82 and year: 2011/2012
Events data saved.
Odds data saved.
Persisting data for league with league_id: 82 and year: 2012/2013
Events data saved.
Odds

# Demo fetching data saved

In [77]:
league_id = 8 
year_format = '2017_2018'
with open(f'sportmonks_data/fixtures_w_odds_{league_id}_{year_format}.json', 'r') as file_loaded:	
	data = json.load(file_loaded)

In [68]:
league_id = 564
year_format = '2016_2017'
with open(f'sportmonks_data/further_past_seasons/fixtures_w_events_{league_id}_{year_format}.json', 'r') as file_loaded:	
	event_data = json.load(file_loaded)

In [69]:
event_data[0].keys()

dict_keys(['id', 'league_id', 'season_id', 'stage_id', 'round_id', 'group_id', 'aggregate_id', 'venue_id', 'referee_id', 'localteam_id', 'visitorteam_id', 'winner_team_id', 'weather_report', 'commentaries', 'attendance', 'pitch', 'details', 'neutral_venue', 'winning_odds_calculated', 'formations', 'scores', 'time', 'coaches', 'standings', 'assistants', 'leg', 'colors', 'deleted', 'is_placeholder', 'localTeam', 'visitorTeam', 'substitutions', 'goals', 'cards', 'other', 'corners', 'lineup', 'bench', 'sidelined', 'stats', 'referee', 'events', 'localCoach', 'visitorCoach'])

In [84]:
league_id = 8
year_format = '2015_2016'
with open(f'sportmonks_data/further_past_seasons/fixtures_w_odds_{league_id}_{year_format}.json', 'r') as file_loaded:	
	odds_data = json.load(file_loaded)

In [85]:
odds_data[0]['odds']

[]

In [None]:
# NB: prem season 18/29 didn't work tt
# do some work on quality control of data loaded