# **Get Match Data + Odds Data**

## *General Part - (Functions + Get all fixtures IDs)*

##### Imports

In [None]:
# Imports
from sportmonks.soccer import SoccerApiV2
from datetime import date
import pandas as pd
import collections
import time 
import pandas as pd
import json
# API Token 
mytoken = ''
soccer = SoccerApiV2(api_token=mytoken)
# Number of HTTP requests made
print(soccer.http_requests_made)

##### Generic Functions

In [None]:
def flatten(d, parent_key='', sep='_'): 
    """This function turns a nested dictionary into a flattened dictionary.
    d: nested dictionary
    """
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [None]:
# Abbreviations of bets' types
abbr1 = {'3WayResult': '3W', 'AsianHandicap' : 'AH', 'GoalsOver/Under': 'O/U', 'CorrectScore' : 'CS'}
abbr2 = {'Home' : 'H', 'Away' : 'A', 'Over' : 'O', 'Under' : 'U', 'Handicap' : 'Hnd', 'Result': 'RES', 'TotalGoals': 'TG'}

def myReplace(text):
    """This function replaces the key terms in dictionaries (abbr1 and abbr2) with the corresponding value, in order to generate a simpler and easier to understand string. 
    text: original string to simplify
    """
    global abbr1, abbr2
    for key in abbr1:
        text = text.replace(key, abbr1[key])
    for key in abbr2:
        text = text.replace(key, abbr2[key])
    return text

##### Common Functions (Fixture and Dynamic Data)

In [None]:
def list_fixtures_or_dynamic(fix_ids_list, fix = True):
    """This function returns a list of all fixtures in fix_ids_list (a list of nested dictionaries) controlling for the API calls limit (maximum of 2000 requests for hour) by using a sleep time. In case an exception occurs during the request, the function prints a warning with the exception's type and the position of fix_ids_list at which it occurred.

    fix_ids_list: list of nested dictionaries containing fixtures data
    fix: True for match static information, False for dynamic (in-time) data
    """
    complete = [] 
    exceptions = []
    for index, i in enumerate(fix_ids_list): 
        if fix: 
            include_feat = ['localTeam', 'visitorTeam', 'referee', 'localCoach', 'visitorCoach', 'round', 'stats', 'league', 'season', 'venue'] # features for fixture
        else:
            include_feat = ['substitutions', 'goals', 'cards', 'corners', 'lineup', 'bench', 'sidelined'] # features for dynamic

        try:
            complete.append(flatten(soccer.fixture(fixture_id=i, includes=include_feat))) 
        except Exception as e:
            print('************** An exception occurred: {} **************'.format(e))
            print('************** At Index: {} **************'.format(index))
            exceptions.append(i)
            time.sleep(10)
        
        if (index + 1) % 1999 == 0: 
            print('STOP N°: ', int((index+1) / 1999),' -------- PERC: ', round((index+1)/len(fix_ids_list)*100, 2), '%')
            print('!!! WAIT ONE HOUR !!!')
            time.sleep(3600)
            print('--- RE-START MAKING API CALLS ---')
    return complete, exceptions

In [None]:
def correct_exceptions_fix_dyn(complete_fix_list, exceptions_list, fix = True):
    """This function corrects the exceptions generated when making fixture or dynamic data requests to the API.
    
    complete_fix_list: list of dictionaries for the fix or dynamic data, where each dictionary contains all fixture information or dynamic events for a particular match.
    exceptions_list: list of exceptions' indexes (in complete_fix_list)
    fix: True for match static information, False for dynamic (in-time) data
    """ 
    # Useful dimensions for checks
    lenght_fix_before_correct = len(complete_fix_list)
    lenght_exceptions = len(exceptions_list) 
    # Print check for correctness
    print('Amount of data loss: ', lenght_exceptions)
    
    # In case of no exceptions
    if not exceptions_list:
        return complete_fix_list
    # Specify if correction is for fixtures or dynamic data
    if fix: 
        include_feat = ['localTeam', 'visitorTeam', 'referee', 'localCoach', 'visitorCoach', 'round', 'stats', 'league', 'season', 'venue'] 
    else:
        include_feat = ['substitutions', 'goals', 'cards', 'corners', 'lineup', 'bench', 'sidelined']
    # Handle exceptions
    for e in exceptions_list:
        complete_fix_list.append((flatten(soccer.fixture(fixture_id=e, includes=include_feat))))
    
    # Print check after correction
    print('No data loss - AFTER CORRECTION? ', len(complete_fix_list) == (lenght_fix_before_correct + lenght_exceptions))
    return complete_fix_list

##### Odds Specific Functions

In [None]:
def odds_ordered(unordered_data):
    """This function orders the unordered odds data resulting from the API in a more manageable format (list of dictionaries - where each dictionary represents a different match), excluding undesired bets' types, and giving more understandable names to  bets. 
    unordered_data: unordered odds data (in its native API format)
    """
    # Not interest in all types of bets
    bets_to_exclude = ['Goalscorer', 'Multi Scorers', 'Team Goalscorer', 'Player to be Booked', 'Player to be Sent Off']
    final_list = []
    for fix in range(len(unordered_data)):
        # Create empty dictionary
        d = {}
        # Get match id
        d['id'] = unordered_data[fix]['match_id'] 
        # Loop only in odds_infos value
        for i in range(len(unordered_data[fix]['odds_infos'])): 
            # Excluding some bets' types
            if unordered_data[fix]['odds_infos'][i]['name'] not in bets_to_exclude: 
                # Get odds_type
                odd_name = unordered_data[fix]['odds_infos'][i]['name'].replace(' ', '') 
            # Loop for each bookmaker
            for j in range(len(unordered_data[fix]['odds_infos'][i]['bookmaker'])): 
                # Get bookmaker name
                bookmaker_name = unordered_data[fix]['odds_infos'][i]['bookmaker'][j]['name'].replace(' ', '')
                
                # Loop for each possible bet
                for k in range(len(unordered_data[fix]['odds_infos'][i]['bookmaker'][j]['odds'])): 
                    # Create a new dictionary with only important values 
                    last_dict = unordered_data[fix]['odds_infos'][i]['bookmaker'][j]['odds'][k] 
                    # Label and value are the two fundamental values 
                    label = last_dict['label'] 
                    value = last_dict['value']

                    # Consider total, handicap and extra only if they exist
                    if 'total' in last_dict.values(): 
                        total = last_dict['total']
                    else:
                        total = None
                    if 'handicap' in last_dict.values():
                        handicap = last_dict['handicap']
                    else:
                        handicap = None
                    if 'extra' in last_dict.values():
                        extra = last_dict['extra']
                    else:
                        extra = None

                    # Create keys' names combining all the bet main characteristics 
                    book_info = odd_name + '_' + bookmaker_name + '__' 
                    # Simplify the keys' names using myReplace()
                    book_info = myReplace(book_info)

                    # Filling the dictionary considering for the existence of different bets' types
                    if total is None and handicap is None and extra is None: 
                        d[book_info + str(label)] = value
                    elif total is not None and handicap is None and extra is None:
                        d[book_info + str(label) + '_' + str(total)] = value
                    elif total is None and handicap is not None and extra is None:
                        d[book_info + str(label) + '_' + str(handicap)] = value
                    elif total is None and handicap is  None and extra is not None:
                        d[book_info + str(label) + '_' + str(extra)] = value
                    else:
                        d[book_info + str(label) + '_' + str(handicap) + '_' + str(total) + '_' + str(extra)] = value
        # Append bets dictionary to list
        final_list.append(d)
    return final_list

In [None]:
def list_all_odds(fix_ids_list):
    """This function returns a list of all odds in fix_ids_list (a list of nested dictionaries) controlling for the API calls limit (maximum of 2000 requests for hour) by using a sleep time. In case an exception occurs during the request, the function prints a warning with the exception's type and the position of fix_ids_list at which it occurred.

    fix_ids_list: list of nested dictionaries containing odds data
    """    
    complete = []
    exceptions_index = []
    for index, i in enumerate(fix_ids_list):
        d = {}
        d['match_id'] = i
        try:
            d['odds_infos'] = soccer.pre_match_odds(fixture_id=i)
        except Exception as e:
            print('************** An exception occurred: {} **************'.format(e))
            print('************** At Index: {} **************'.format(index))
            d['exception'] = index
            exceptions_index.append(index)
            time.sleep(10)
        finally:
            complete.append(d)
        if (index + 1) % 1999 == 0: 
            print('STOP N°: ', int((index+1) / 1999),' -------- PERC: ', round((index+1)/len(fix_ids_list)*100, 2), '%')
            print('!!! WAIT ONE HOUR !!!')
            time.sleep(3600)
    return complete, exceptions_index

In [None]:
def check_exceptions_odds(complete_odds_list): 
    """This function checks for the presence of exceptions or other errors in a odds list of dictionaries where each dictionary contains all odds for a particular match.
    
    complete_odds_list: list of dictionaries for the odds data to check
    """
    print('\nCheck for Exceptions and Errors: ')
    exceptions_count = 0
    for i in range(len(complete_odds_list)):
        for k in complete_odds_list[i].keys():
            if k not in ['match_id', 'odds_infos']:
                print(complete_odds_list[i])
                exceptions_count += 1
    print('N. of detected errors: ', exceptions_count)

In [None]:
def correct_exceptions_odds(complete_odds_list, exceptions_list):
    """This function corrects the exceptions generated when making odds data requests to the API.
    
    complete_odds_list: list of dictionaries for the odds data, where each dictionary contains all odds for a particular match.
    exceptions_list: list of exceptions' indexes (in complete_odds_list)
    """  
    # Check for exceptions before   
    check_exceptions_odds(complete_odds_list)
    # In case there are no exceptions no necessity to correct and a 2nd check 
    if not exceptions_list:
        return complete_odds_list
    
    # Handle exceptions
    for e in exceptions_list:
        # Delete exception key and value
        complete_odds_list[e].pop('exception', None)
        # Make a new request to the API to get only the data with an exception
        complete_odds_list[e]['odds_infos'] = soccer.pre_match_odds(fixture_id = complete_odds_list[e]['match_id'])
    # Check for exceptions after
    check_exceptions_odds(complete_odds_list)
    return complete_odds_list

In [None]:
def store_inJSON(complete_odds_list, file_path):
    """This function stores, after ordering and correcting its format, complete_odds_list as a JSON file. It also provides a quick check of the process.
    
    complete_odds_list: list of dictionaries for the odds data, where each dictionary contains all odds for a particular match.
    file_path: path where to store data as JSON file
    """  
    # Ordering Data
    final_odds = odds_ordered(complete_odds_list)

    # After a check, store data as a JSON file
    if len(final_odds) == len(complete_odds_list):
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(final_odds, f, ensure_ascii=False, indent=4)
        f.close()
        print('Correctly Stored!!!')
    else: 
        print('Error occurred in the ordering process - Data not stored!')

##### Get All Fixtures IDs for each League & Cup

In [None]:
# Specify end day for API requests 
end_day = '2022-04-22'
# Empty dictionaries to store cups and leagues fixture data from API
cups_fix_dict = dict()
leagues_fix_dict = dict()

In [None]:
# To obtain the wanted fixtures, need to specify start date, end date (end_day), and the league_ids. Then store them in the previously created dictionaries giving each competition a different name.

# CHAMPIONS LEAGUE 
cups_fix_dict['cl_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 2) 
# EUROPA LEAGUE
cups_fix_dict['el_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 5)
# PREMIER LEAGUE 
leagues_fix_dict['pl_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 8)
# FA CUP
cups_fix_dict['fa_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 24)
# BUNDESLIGA 
leagues_fix_dict['bu_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 82)
# LIGUE 1 
leagues_fix_dict['l1_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 301)
# SERIE A 
leagues_fix_dict['sa_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 384)
# COPPA ITALIA
cups_fix_dict['ci_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 390)
# LA LIGA
leagues_fix_dict['ll_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 564)
# COPA DEL REY 
cups_fix_dict['dr_fixtures'] = soccer.fixtures(start_date = '2011-08-01', end_date = end_day, league_ids = 570)

In [None]:
# Empty lists to reorganize and store fixtures observations
cups_all_fixtures = []
leagues_all_fixtures = []
# Create a list of dictionaries for cups' fixtures 
for cupvalues in cups_fix_dict.values():
    for fix in cupvalues:
        cups_all_fixtures.append(fix['id'])
# Create a list of dictionaries for leagues' fixtures     
for leaguevalues in leagues_fix_dict.values():
    for fix in leaguevalues:
        leagues_all_fixtures.append(fix['id'])

# Check the length of both lists
print('N. of fixtures observed for CUPS: ', len(cups_all_fixtures))
print('N. of fixtures observed for LEAGUES: ', len(leagues_all_fixtures))

## *Match Data* 

Leagues Fixtures

In [None]:
time.sleep(3600)
# Get fixtures data for leagues games
leagues_complete, exceptions_leagues_complete = list_fixtures_or_dynamic(leagues_all_fixtures, fix=True)

In [None]:
# Correction of exceptions (if necessary)
leagues_complete = correct_exceptions_fix_dyn(complete_fix_list=leagues_complete, exceptions_list=exceptions_leagues_complete)

# Store leagues data as CSV
leagues = pd.DataFrame(leagues_complete)
leagues.to_csv("../../Data/From_Collection/Match&Odds/leagues_static.csv", encoding='utf-8', index=False)

Cups Fixtures

In [None]:
time.sleep(3600) 
# Get fixtures data for cups games
cups_complete, exceptions_cups_complete = list_fixtures_or_dynamic(cups_all_fixtures, fix=True)

In [None]:
# Correction of exception (if necessary)
cups_complete = correct_exceptions_fix_dyn(complete_fix_list=cups_complete, exceptions_list=exceptions_cups_complete)

# Store cups data as CSV
cups = pd.DataFrame(cups_complete)
cups.to_csv("../../Data/From_Collection/Match&Odds/cups_static.csv", encoding='utf-8', index=False)

## *Odds Data* 

Leagues Data

In [None]:
time.sleep(3600)
# Acquire data from API
leagues_complete_odds, exceptions_leagues_odds = list_all_odds(leagues_all_fixtures)

In [None]:
# Handle exceptions from previous step
leagues_complete_odds = correct_exceptions_odds(complete_odds_list=leagues_complete_odds, exceptions_list=exceptions_leagues_odds)
# Store data
store_inJSON(complete_odds_list=leagues_complete_odds, file_path='../../Data/From_Collection/Match&Odds/leagues_odds.json')

Cups Data

In [None]:
time.sleep(3600)
cups_complete_odds, exceptions_cups_odds = list_all_odds(cups_all_fixtures) 

In [None]:
# Handle exceptions from previous step
cups_complete_odds = correct_exceptions_odds(complete_odds_list=cups_complete_odds, exceptions_list=exceptions_cups_odds)
# Store data
store_inJSON(complete_odds_list=cups_complete_odds, file_path='../../Data/From_Collection/Match&Odds/cups_odds.json')

## *More Features Data*

Leagues More

In [None]:
time.sleep(3600)
leagues_more, exceptions_leagues_more = list_fixtures_or_dynamic(leagues_all_fixtures, fix=False)

In [None]:
# Correction of exceptions (if necessary)
leagues_more = correct_exceptions_fix_dyn(complete_fix_list=leagues_more, exceptions_list=exceptions_leagues_more, fix=False)

# Store cups data as CSV
leagues = pd.DataFrame(leagues_more)
leagues.to_csv("../../Data/From_Collection/Match&Odds/leagues_more.csv", encoding='utf-8', index=False)

Cups More

In [None]:
time.sleep(3600)
cups_more, exceptions_cups_more = list_fixtures_or_dynamic(cups_all_fixtures, fix=False)

In [None]:
# Correction of exceptions (if necessary)
cups_more = correct_exceptions_fix_dyn(complete_fix_list=cups_more, exceptions_list=exceptions_cups_more, fix=False)

# Store cups data as CSV
cups = pd.DataFrame(cups_more)
cups.to_csv("../../Data/From_Collection/Match&Odds/cups_more.csv", encoding='utf-8', index=False)