# Data gathering for all analysis

#### Fetching data and creating folder structure where data is stored
#### There are 3 apis used to fetch data for the teams, one to get all the match details and player details, second to get the goals and assists and cards awarded to each player and third to get more information about players, like saves, clean sheets, penalties scored, missed, saved, own goals conceded, etc in addition to cards and goal details for all players

In [1]:
# Import statements
import json, glob, os, re, requests, time
from datetime import date, datetime as datetime
import sys
from unidecode import unidecode

In [2]:
# Crowdscore developer console API key
crowdscore_api_key = os.getenv("crowdscore_api_key")
print(crowdscore_api_key)

#Defining varialbes for later use
crowdscore_epl_competition_id = 2
league_id = 0;
team_ids = []
players = []

aeaba671d12546dfaae1401e242a94af


In [3]:
# Different apis to fetch data from, added in list
apis = ['search_all_leagues','lookup_all_teams', 'lookup_all_players', 'eventsseason', 'teams', 'seasons', 'rounds', 'playerstats', 'fpl']
data_folders_directory = []

In [4]:
## Function to create folder structure to store data for different apis in json format for question 2
def create_directory_for_data(api):
    current_dir = os.path.dirname('__file__')                                #Relative path for current directory
    data_folder = current_dir
    return create_subfolders_for_data(data_folder, 'data', api)

## Function to create subfolder as per the path specified and api names
def create_subfolders_for_data(data_folder, data, api):
    directory =os.path.join(data_folder, data, api)
    if not os.path.exists(directory):
        os.makedirs(directory)
        return directory
    else:
        return directory
    
#Function to write data to json file at respective location
def write_to_json_file(file_path, json_data):
    with open(file_path, 'w') as json_out:
        json.dump(json_data, json_out, indent=2)
        
for api in apis:
    data_folder_dir = create_directory_for_data(api)
    data_folders_directory.append(data_folder_dir)

In [5]:
#Function to fetch response from api provided
## As both apis have different calling parameters, using if to differentiate between the calls
def fetch_response_from_api(api, team_id, player_id, season, game_week):
    if api == 'search_all_leagues':
        #URL to hit
        url = 'http://www.thesportsdb.com/api/v1/json/1/search_all_leagues.php?s=soccer'
        response = requests.get(url)
        return response
    
    elif api == 'lookup_all_teams':    
        #URL to hit
        url = 'http://www.thesportsdb.com/api/v1/json/1/lookup_all_teams.php'
        #Parameters to pass
        payload = {'id':league_id}
        response = requests.get(url,params=payload)
        return response 
    
    elif api == 'lookup_all_players':
        #URL to hit
        url = 'http://www.thesportsdb.com/api/v1/json/1/lookup_all_players.php'
        #Parameters to pass
        payload = {'id':team_id}
        response = requests.get(url,params=payload)
        return response
    
    elif api == 'eventsseason':
        #URL to hit
        url = 'http://www.thesportsdb.com/api/v1/json/1/eventsseason.php'
        #Parameters to pass
        payload = {'id': league_id, 's': season }
        response = requests.get(url,params=payload)
        return response
    
    elif api == 'teams':
        #URL to hit
        url = 'https://api.crowdscores.com/v1/teams'
        #Parameters to pass
        payload = {'api_key':crowdscore_api_key, 'competition_ids': crowdscore_epl_competition_id}
        response = requests.get(url,params=payload) 
        return response
    
    elif api == 'seasons':
        #URL to hit
        url = 'https://api.crowdscores.com/v1/seasons'        
        #Parameters to pass
        payload = {'api_key':crowdscore_api_key}
        response = requests.get(url,params=payload)
        return response
    
    elif api == 'rounds':
        #URL to hit
        url = 'https://api.crowdscores.com/v1/rounds'
        #Parameters to pass
        payload = {'api_key':crowdscore_api_key, 'competition_ids': crowdscore_epl_competition_id}
        response = requests.get(url,params=payload)
        return response
    
    elif api == 'playerstats':
        #URL to hit
        url = 'https://api.crowdscores.com/v1/playerstats'
        #Parameters to pass
        payload = {'api_key':crowdscore_api_key, 'competition_ids': crowdscore_epl_competition_id,'team_ids':team_id,'round_ids':game_week,'season_ids':season}
        response = requests.get(url,params=payload)
        return response
    
    elif api == 'fpl':
        #URL to hit
        url = 'https://fantasy.premierleague.com/drf/bootstrap-static'
        #Parameters to pass
        response = requests.get(url)
        return response
    
#Returns response object

In [6]:
# Function to extract only articles from the response file
def process_response_from_service(response, api, file_path):
    if response.status_code == 200:
        response = response.text.split('<br />')
        response = response[0].encode('utf-8').decode('utf-8')
        res = json.loads(response)
        file = []
        if os.path.exists(file_path):
            with open(file_path) as fil:
                # Get already present file and its content as we will use this multiple times to gather data
                file = json.load(fil)
                
                if api == 'lookup_all_players':
                    # Remove duplicates
                    this_response = [player for player in res['player'] if player['idPlayer'] not in [pl['idPlayer'] for pl in file]]
                    file.extend(this_response)
                elif api == 'playerstats':
                    # Remove duplicates
                    this_response = [player for player in res if player['dbid'] not in [pl['dbid'] for pl in file]]
                    file.extend(this_response)
        else:
            # If the file is not present, dont check for duplicates. Just write the articles into variable
            if api == 'lookup_all_players':
                file.extend(res['player'])
            else:
                file = res

        #Write output to json format
        write_to_json_file(file_path, file)
        return file
    else:
        #Show error messages in case an API fails
        print('Request Failed for archives with status code',response.status_code)
        return 0

In [7]:
for data_folder_dir in data_folders_directory:
    api = data_folder_dir[5:]
    
    #Generate name of response files
    file_name = api+'_response'
    file_path = os.path.join(data_folder_dir, file_name)
    file_path+='.json'
    
    status_from_json = 200
    
    # Fetch data till we get error from response 
    while(status_from_json == 200):
        # Add time delay between 2 api calls to fetch response without interruption
        time.sleep(1)
        
        # Fetch response for each API
        if api == 'search_all_leagues':
            response = fetch_response_from_api(api, 0, 0, 0, 0)
            # save all league details from response into json file 
            resp_in_json = process_response_from_service(response, api, file_path)
            # Find the league id for English Premier League and store in an array so that in future if we want to analyze for more leagues, we can do that
            league_id = [league['idLeague'] for league in resp_in_json['countrys'] if league['strCountry'] and league['strLeague']=='English Premier League']
        
        elif api == 'lookup_all_teams':
            response = fetch_response_from_api(api, 0, 0, 0, 0)
            # save details of all teams playing in EPL for current season in json file
            resp_in_json = process_response_from_service(response, api, file_path)
            # Find the team id for each team in English Premier League and store in an array
            team_ids = [team['idTeam'] for team in resp_in_json['teams']]
            
        elif api == 'lookup_all_players':
            # For each team, according to team id, fetch players details
            for team_id in team_ids:
                response = fetch_response_from_api(api, team_id, 0, 0, 0)
                # save the players details from response into json file 
                resp_in_json = process_response_from_service(response, api, file_path)
        
        elif api == 'eventsseason':
            season = ''
            # Find football season according to current month
            # before season starts in August, use season as (last year - this year) else use (this year - next year)
            if int(datetime.now().strftime("%m")) < 8:
                season = str(int(datetime.now().strftime("%y"))-1)+""+datetime.now().strftime("%y")
            else:
                season = datetime.now().strftime("%y")+""+str(int(datetime.now().strftime("%y"))+1)
            response = fetch_response_from_api(api, 0, 0, season, 0)
            # save the articles only from response into json file
            resp_in_json = process_response_from_service(response, api, file_path)
      
        elif api == 'teams':
            response = fetch_response_from_api(api, 0, 0, 0, 0)
            # save the details of each team from response into json file
            resp_in_json = process_response_from_service(response, api, file_path)
            # Find team id and store in a list
            team_ids = [team['dbid'] for team in resp_in_json]
            
        elif api == 'seasons':
            response = fetch_response_from_api(api, 0, 0, 0, 0)
            # save the season from response into json file
            resp_in_json = process_response_from_service(response, api, file_path)
            # Find the id for current season from response
            season = ''
            if int(datetime.now().strftime("%m")) < 8:
                season = str(int(datetime.now().strftime("%Y"))-1)+"/"+datetime.now().strftime("%Y")
            else:
                season = datetime.now().strftime("%Y")+"/"+str(int(datetime.now().strftime("%Y"))+1)
            season = [years['dbid'] for years in resp_in_json if years['name']==season]
            
        elif api == 'rounds':
            response = fetch_response_from_api(api, 0, 0, 0, 0)
            # save the round id only from response into json file
            resp_in_json = process_response_from_service(response, api, file_path)
            # save id for round necessary as an input parameter for getting player stats
            game_week = resp_in_json[0]['dbid']
        
        elif api == 'playerstats':
            for team_id in team_ids:
                response = fetch_response_from_api(api, team_id, 0, season, game_week)
                #Modify file path to get different file name for each team
                path_to_file = file_path.split('.json');
                path = path_to_file[0]+str(team_id)+'.json'
                # save the details of each team in a separate  only from response into json file
                resp_in_json = process_response_from_service(response, api, path)
        
        elif api == 'fpl':
            response = fetch_response_from_api(api, 0, 0, 0, 0)
            # save all league details from response into json file 
            resp_in_json = process_response_from_service(response, api, file_path)
            
        # Change the variable holding response to anything except 200 so that we can move to next api call    
        status_from_json = 201           
        

In [140]:
# u = "H\u00e5vard Nordtveit"
# unidecode(u)