# Data Gathering
Notebook for gathering the data from the FPL API.

## Packages

In [1]:
import os 
import requests
import pandas as pd
import logging
# import pyyaml
import sys
import time 
import requests
from datetime import datetime

In [2]:
# Loading FPL path. Saved as environment variable
path_to_files = os.environ['FPL_path']

# Adding path_to_files to list of interpreter module search directories
sys.path.append(path_to_files)

In [3]:
logging.basicConfig(filename=path_to_files + 'Logs/data_gathering_log_{}.log'.format(datetime.now().strftime("%d%m%Y")), 
                    filemode='w',
                    format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
                    level=logging.INFO)

## Importing Classes

In [4]:
from API_call import GeneralAPICall, DetailedAPICall

## Data Gathering - API call

In [5]:
# Originally containing in config file. yaml and pyyaml are not working in VSCode

general_url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
detailed_url = 'https://fantasy.premierleague.com/api/element-summary/'

In [6]:
# General API Call 

"""
Exceptions for API Calls
Timeout
    - The request has timed out
TooManyRedirects
    - Too many redirects. Try a different URL.
RequestExcept
    - Ambiguous exception that occured while handling the request

Can add more exceptions or custom exceptions if required
"""
try:
    api_call = GeneralAPICall(general_url)
    total_players = api_call.total_players()
    logging.info('General API call for total players successfully complete')
except requests.exceptions.Timeout:
    logging.info('General API call for total players timed out')
except requests.exceptions.TooManyRedirects:
    logging.info('General API call for total players URL incorrect')
except requests.exceptions.RequestException as e:
    logging.info(f'General API call for total players ambigous error. Research further. \n Error: {e}')

In [7]:
# Detailed API Call 

try:
    detailed_api_call = DetailedAPICall(detailed_url)
    id_list = detailed_api_call.player_ids(total_players)
    detailed_player_df = detailed_api_call.detailed_total_players(id_list)
    logging.info('Detailed API call for complete')
except requests.exceptions.Timeout:
    logging.info('Detailed API call for ids and detailed player information timed out')
except requests.exceptions.TooManyRedirects:
    logging.info('Detailed API call for ids and detailed player information incorrect')
except requests.exceptions.RequestException as e:
    logging.info(f'Detailed API call for ids and detailed player information  ambigous error. Research further. \n Error: {e}')

Runtime:  44.25422263145447


## Data Checks

In [8]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [9]:
total_players.shape

(661, 88)

In [10]:
total_players.head()

Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,ep_next,ep_this,event_points,first_name,form,id,in_dreamteam,news,news_added,now_cost,photo,points_per_game,second_name,selected_by_percent,special,squad_number,status,team,team_code,total_points,transfers_in,transfers_in_event,transfers_out,transfers_out_event,value_form,value_season,web_name,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,influence_rank,influence_rank_type,creativity_rank,creativity_rank_type,threat_rank,threat_rank_type,ict_index_rank,ict_index_rank_type,corners_and_indirect_freekicks_order,corners_and_indirect_freekicks_text,direct_freekicks_order,direct_freekicks_text,penalties_order,penalties_text,expected_goals_per_90,saves_per_90,expected_assists_per_90,expected_goal_involvements_per_90,expected_goals_conceded_per_90,goals_conceded_per_90,now_cost_rank,now_cost_rank_type,form_rank,form_rank_type,points_per_game_rank,points_per_game_rank_type,selected_rank,selected_rank_type,starts_per_90,clean_sheets_per_90
0,100.0,100.0,58822,0,0,-3,3,0,2,0.7,0.2,0,Cédric,0.2,1,False,,2022-09-16T13:30:06.551108Z,42,58822.jpg,1.0,Alves Soares,0.1,False,,a,1,3,1,5588,102,25385,211,0.0,0.2,Cédric,27,0,0,0,0,0,0,0,0,0,0,0,2,2.4,1.6,0.0,0.4,0,0.0,0.0,0.0,0.0,421,158,383,140,467,189,425,158,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,539,157,354,121,367,134,401,149,0.0,0.0
1,,,84450,0,0,1,-1,1,3,5.3,4.8,3,Granit,4.8,3,False,,,51,84450.jpg,4.7,Xhaka,3.9,False,,a,1,3,61,662560,11972,415472,20088,0.9,12.0,Xhaka,1151,3,3,6,11,0,0,0,2,0,0,7,250,291.8,278.3,228.0,79.8,0,0.0,0.0,0.0,0.0,47,19,32,25,57,35,30,21,,,3.0,,,,0.0,0.0,0.0,0.0,0.0,0.86,170,99,38,14,33,16,82,27,0.0,0.46916
2,100.0,100.0,153256,0,0,-3,3,1,3,0.7,0.2,1,Mohamed,0.2,4,False,,2022-08-30T11:30:06.278675Z,42,153256.jpg,1.5,Elneny,0.7,False,,a,1,3,3,137118,8400,197534,4083,0.0,0.7,Elneny,93,0,0,0,1,0,0,0,0,0,0,0,18,4.2,5.3,0.0,1.0,0,0.0,0.0,0.0,0.0,409,179,359,170,473,183,406,181,,,,,,,0.0,0.0,0.0,0.0,0.0,0.97,546,282,357,166,319,140,217,68,0.0,0.0
3,,,156074,0,0,-3,3,0,2,1.0,0.5,1,Rob,0.5,5,False,,,42,156074.jpg,1.0,Holding,0.1,False,,a,1,3,4,5492,119,15601,200,0.1,1.0,Holding,12,0,0,0,0,0,0,0,0,0,0,0,14,2.4,0.1,0.0,0.2,0,0.0,0.0,0.0,0.0,423,160,422,158,474,194,435,161,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,547,163,312,103,370,137,420,156,0.0,0.0
4,100.0,100.0,167199,0,0,-2,2,0,3,4.7,4.2,3,Thomas,4.2,6,False,,2022-09-25T09:00:06.484502Z,48,167199.jpg,4.0,Partey,0.5,False,,a,1,3,40,74028,2793,92312,2215,0.9,8.3,Partey,869,2,0,6,6,0,0,0,0,0,0,4,182,208.6,128.1,111.0,44.8,0,0.0,0.0,0.0,0.0,101,42,116,83,144,91,112,70,,,,,,,0.0,0.0,0.0,0.0,0.0,0.62,261,159,57,24,62,29,243,78,0.0,0.6214


In [11]:
total_players.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 661 entries, 0 to 660
Columns: 88 entries, chance_of_playing_next_round to clean_sheets_per_90
dtypes: bool(2), float64(13), int64(47), object(26)
memory usage: 445.5+ KB


In [12]:
detailed_player_df.shape

(8491, 36)

In [13]:
detailed_player_df.head()

Unnamed: 0,element,fixture,opponent_team,total_points,was_home,kickoff_time,team_h_score,team_a_score,round,minutes,goals_scored,assists,clean_sheets,goals_conceded,own_goals,penalties_saved,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index,starts,expected_goals,expected_assists,expected_goal_involvements,expected_goals_conceded,value,transfers_balance,selected,transfers_in,transfers_out
0,1,1,7,0,False,2022-08-05T19:00:00Z,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,45,0,23970,0,0
1,1,11,10,0,True,2022-08-13T14:00:00Z,4,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,44,-5169,24193,1361,6530
2,1,21,3,0,False,2022-08-20T16:30:00Z,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,44,-4337,20960,879,5216
3,1,31,9,0,True,2022-08-27T16:30:00Z,2,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,43,-2988,18825,577,3565
4,1,41,2,0,True,2022-08-31T18:30:00Z,2,1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,43,-1611,17790,405,2016


In [14]:
detailed_player_df.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8491 entries, 0 to 0
Columns: 36 entries, element to transfers_out
dtypes: bool(1), int64(26), object(9)
memory usage: 2.3+ MB


## Exporting to CSV

In [15]:
path_to_data = os.path.join(path_to_files, "Datasets/detailed_player_dataset_{}.csv".format(datetime.now().strftime("%d%m%Y")))
detailed_player_df.to_csv(path_to_data, index=False)
logging.info('Dataset exported to csv')