# FPL API Request

This script requests all player histories from the FPL API, and saves the result as a BigQuery dataset. 


In [None]:
!pip install google-cloud-bigquery

import requests
import pandas as pd
from google.cloud import bigquery

With the libraries we've installed and loaded, we can request the data we want from the FPL API.

In [1]:
# Get all player IDs.
players_url = "https://fantasy.premierleague.com/api/bootstrap-static/"
response = requests.get(players_url)
data = response.json()
players = pd.DataFrame(data["elements"])

# Request historic data for each player.
for element_id in players['id']:
    url = f'https://fantasy.premierleague.com/api/element-summary/{element_id}/'
    r = requests.get(url)
    r = r.json()
    player_history = pd.DataFrame(r['history_past'])
    player_history['id'] = element_id
    if element_id == players['id'][0]:
        all_history = player_history
    else:
        all_history = pd.concat([all_history, player_history])
        
# Reformat dataframe as list of dictionaries so BQ can read it.
all_history_dict = all_history.to_dict('records')

NameError: name 'requests' is not defined

Now let's set up a BigQuery table to hold the data, and allow us to process it easily.

In [None]:
# Set up BigQuery client using service account credentials.
credentials = bigquery.Credentials.from_service_account_file(
    'path/to/service_account_key.json',
    project='bf-fpl-pred-080723',
)
client = bigquery.Client(credentials=credentials)

# Create dataset.
dataset_id = 'fpl_historic_data'
table_id = 'raw_historic_data'

dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
dataset.location = 'EU' 
dataset = client.create_dataset(dataset, exists_ok=True)

# Create the table if it doesn't exist.
schema = [
    bigquery.SchemaField('season_name', 'STRING')
    bigquery.SchemaField('element_code', 'INTEGER')
    bigquery.SchemaField('start_cost', 'INTEGER')
    bigquery.SchemaField('end_cost', 'INTEGER')
    bigquery.SchemaField('total_points', 'INTEGER')
    bigquery.SchemaField('minutes', 'INTEGER')
    bigquery.SchemaField('goals_scored', 'INTEGER')
    bigquery.SchemaField('assists', 'INTEGER')
    bigquery.SchemaField('clean_sheets', 'INTEGER')
    bigquery.SchemaField('goals_conceded', 'INTEGER')
    bigquery.SchemaField('own_goals', 'INTEGER')
    bigquery.SchemaField('penalties_saved', 'INTEGER')
    bigquery.SchemaField('penalties_missed', 'INTEGER')
    bigquery.SchemaField('yellow_cards', 'INTEGER')
    bigquery.SchemaField('red_cards', 'INTEGER')
    bigquery.SchemaField('saves', 'INTEGER')
    bigquery.SchemaField('bonus', 'INTEGER')
    bigquery.SchemaField('bps', 'INTEGER')
    bigquery.SchemaField('influence', 'FLOAT')
    bigquery.SchemaField('creativity', 'FLOAT')
    bigquery.SchemaField('threat', 'FLOAT')
    bigquery.SchemaField('ict_index', 'FLOAT')
    bigquery.SchemaField('starts', 'INTEGER')
    bigquery.SchemaField('expected_goals', 'FLOAT')
    bigquery.SchemaField('expected_assists', 'FLOAT')
    bigquery.SchemaField('expected_goal_involvements', 'FLOAT')
    bigquery.SchemaField('expected_goals_conceded', 'FLOAT')
    bigquery.SchemaField('id', 'INTEGER')
]

table_ref = dataset_ref.table(table_id)
table = bigquery.Table(table_ref, schema=schema)
table = client.create_table(table, exists_ok=True)

In [None]:
# Insert data into the table, overwriting any existing data.
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE 
job = client.load_table_from_json(data_dict, table_ref, job_config=job_config)
job.result()

print(f'Data uploaded to BigQuery table {table_id} in dataset {dataset_id}')