### Collect data and save to pickle

#### Goal:  To save all data in a file the I think will be easier to use.  I hope to structure the pickles like a SQL database

In [1]:
# Python imports
import requests

# 3rd party imports
import pandas as pd

# Custom imports
import api_utils as api
from definitions import PICKLE_DIR, RAPID_API_KEY

In [2]:
json = api.get_json_from_fpl_api('https://fantasy.premierleague.com/api/bootstrap-static/')

fixtures_json = requests.get(
  "https://api-football-beta.p.rapidapi.com/fixtures", 
  headers = {
    'x-rapidapi-host': "api-football-beta.p.rapidapi.com",
    'x-rapidapi-key': RAPID_API_KEY
    },
  params= {"season": "2022", "league":"39"}
    ).json()

team_info_json = requests.get(
  "https://api-football-beta.p.rapidapi.com/teams", 
  headers = {
    'x-rapidapi-host': "api-football-beta.p.rapidapi.com",
    'x-rapidapi-key': RAPID_API_KEY
    },
  params= {"season": "2022", "league":"39"}
    ).json()

ConnectionError: HTTPSConnectionPool(host='fantasy.premierleague.com', port=443): Max retries exceeded with url: /api/bootstrap-static/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7f825f091ac0>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))

In [None]:
team_info_data = {}
for team in team_info_json['response']:
  team_info_data[team['team']['name']] = {
    'api_team_id': team['team']['id'],
    'venue_id': team['venue']['id'],
    'venue_name': team['venue']['name']
  }

rapid_api_team_info_df = pd.DataFrame(team_info_data)
rapid_api_team_info_df = rapid_api_team_info_df.T
# rapid_api_team_info_df

Unnamed: 0,api_team_id,venue_id,venue_name
Manchester United,33,556,Old Trafford
Newcastle,34,562,St. James' Park
Bournemouth,35,504,Vitality Stadium
Fulham,36,535,Craven Cottage
Wolves,39,600,Molineux Stadium
Liverpool,40,550,Anfield
Southampton,41,585,St. Mary's Stadium
Arsenal,42,494,Emirates Stadium
Everton,45,8560,Goodison Park
Leicester,46,547,King Power Stadium


In [None]:
fixtures_json['response'][0].keys()

dict_keys(['fixture', 'league', 'teams', 'goals', 'score'])

In [None]:
fixture_data = {}

for fixture in fixtures_json['response']:

  fixture_data[fixture['fixture']['id']] = {
    'timestamp': fixture['fixture']['timestamp'],
    'venue': fixture['fixture']['venue']['id'],
    'api_home_team_id': fixture['teams']['home']['id'],
    'api_away_team_id': fixture['teams']['away']['id'],
    'home_score': fixture['score']['fulltime']['home'],
    'away_score': fixture['score']['fulltime']['away'],
  }


In [None]:
rapid_api_fixture_df = pd.DataFrame(fixture_data).infer_objects()
rapid_api_fixture_df = rapid_api_fixture_df.T
rapid_api_fixture_df = rapid_api_fixture_df.convert_dtypes()
rapid_api_fixture_df.rename_axis('fixture_id', inplace=True)
rapid_api_fixture_df.to_pickle(f"{PICKLE_DIR}/fixtures.pickle")

In [None]:
json.keys()

dict_keys(['events', 'game_settings', 'phases', 'teams', 'total_players', 'elements', 'element_stats', 'element_types'])

### Save gameweek info.

In [None]:
events_df = pd.DataFrame(json['events'])

gameweek_df = events_df[['id', 'deadline_time_epoch', 'data_checked']].set_index('id')

gameweek_df.to_pickle(f"{PICKLE_DIR}/gameweek.pickle")

### Save transfer stats

In [None]:
transfer_stats_by_week_df = events_df[['id', 'most_selected', 'most_transferred_in', 'most_captained', 'most_vice_captained']].set_index('id')
transfer_stats_by_week_df.to_pickle(f"{PICKLE_DIR}/transfer_stats_by_week.pickle")

#### game_settings nor phases relevant for data insight.

### Save team information

In [None]:
teams_df = pd.DataFrame(json['teams'])
teams_df.rename(columns={'id': 'fpl_team_id'}, inplace=True)

team_info_df = teams_df[['fpl_team_id', 'name']].set_index('fpl_team_id')
team_info_df['name'].replace({"Man Utd":"Manchester United", "Man City": "Manchester City", "Spurs":"Tottenham"}, inplace=True)
team_info_df = team_info_df.merge(right=rapid_api_team_info_df, left_on='name', right_index=True)
team_info_df.head()
team_info_df.to_pickle(f"{PICKLE_DIR}/team_info.pickle")

### Save team strength.

In [None]:
team_strength_df = teams_df[['fpl_team_id', 'strength', 
  'strength_overall_home', 'strength_attack_home', 'strength_defence_home', 
  'strength_overall_away', 'strength_attack_away', 'strength_defence_away']].set_index('fpl_team_id')

team_strength_df.to_pickle(f"{PICKLE_DIR}/team_strength.pickle")

In [None]:
team_info_df = teams_df[['fpl_team_id', 'name', 'short_name']].set_index('fpl_team_id')

team_info_df.to_pickle(f"{PICKLE_DIR}/team_info.pickle")

### total_players not relevant

In [None]:
elements_df = pd.DataFrame(json['elements'])

pl_player_info_df = elements_df[[
  'id', 'team',  # reference info
  'first_name', 'second_name', 'web_name',  # Name
  'element_type', 'squad_number', 'photo' # Additional
  ]].set_index('id')
pl_player_ingame_stats_df = elements_df[[
  'id', 
  'minutes', 'yellow_cards', 'red_cards', 'penalties_missed', 'penalties_saved', # General stats
  'goals_scored', 'assists', # Attacking stats
  'clean_sheets', 'goals_conceded', 'saves', 'own_goals' # Defensive stats
  ]].set_index('id')

pl_player_info_df.to_pickle(f"{PICKLE_DIR}/pl_player_info.pickle")
pl_player_ingame_stats_df.to_pickle(f"{PICKLE_DIR}/pl_player_ingame_stats.pickle")

### Can also extract totals from ```json['elements']```, e.g. total_points.  I think it would be better to arrange info by gameweek and calculate totals. 

In [None]:
element_stats_df = pd.DataFrame(json['element_stats'])

element_stats_df.set_index('name', inplace=True)

element_stats_df.to_pickle(f"{PICKLE_DIR}/element_stats.pickle")

In [None]:
element_types_df = pd.DataFrame(json['element_types'])

element_types_df.set_index('id')

element_info_df = element_types_df[[
  'id', 'plural_name', 'plural_name_short', 'singular_name'
]].set_index('id')

element_info_df.to_pickle(f"{PICKLE_DIR}/element_info.pickle")