# World Football Data - Exploration and Clean

In this project, we will perform fundamental analysis on the Eurpeann Soccer Leagues. 

This Noteboook contains the data exploration and cleanup

In [None]:
# imports
import panel as pn
pn.extension('plotly')
import plotly.express as px
import pandas as pd
import hvplot.pandas
import matplotlib.pyplot as plt
import calendar
import os
import requests
import json
from pathlib import Path
from dotenv import load_dotenv
from sqlalchemy import create_engine

## Create SQL Connection to DB

In [None]:
# Create a connection to the database
engine = create_engine("postgresql://postgres:MJU&nhy6bgt5@localhost:5432/euro_soccer_db")

## Create API Connection to re-use for all requests 

In [None]:
# Api Credentials for request authorisation
api_connection = {
    'x-rapidapi-host': "api-football-v1.p.rapidapi.com",
    'x-rapidapi-key': "c52f0a3d4fmshc1fa22df80c04e0p190947jsn6657d9612f32"
    }

# Below is just exploration code for data gatehering cleaning and testing. Any final code for visualisations goes into visual_data_analysis workbook

### Football API - Get data for the English Premier Leauge Seasons

In [None]:
# Football API URL for epl 
epl_url = "https://api-football-v1.p.rapidapi.com/v2/leagues/seasonsAvailable/524"

# Get data from API for Countries
epl_response = requests.request("GET", epl_url, headers=api_connection)

In [None]:
# Check keys of response
epl_response.json().keys()

In [None]:
# Check keys at next level of response
epl_response.json()['api'].keys()

In [None]:
# Create dictionary of results for 'leagues' key
leagues_dict = epl_response.json()['api']['leagues']

# Visualize df for all English Premier league seasons available
leagues_df = pd.DataFrame.from_dict(leagues_dict)
leagues_df


### Football API - Get data for the English Premier Leauge Seasons for Top Scorers

In [None]:
# Football API URL for top scorers
top_scorers_url = "https://api-football-v1.p.rapidapi.com/v3/players/topscorers"

In [None]:
# Query String
top_scorers_querystring = {"league":"39","season":"2020"}

# Get data from API for Countries
top_scorers_response = requests.request("GET", top_scorers_url, headers=api_connection, params=top_scorers_querystring)

In [None]:
# Check keys of response
top_scorers_response.json().keys()

In [None]:
# Check keys at next level of response for response
top_scorers_dict = top_scorers_response.json()['response']

In [None]:
top_scorers_dict

In [None]:
# Extract Data from JSON for new dataframe
top_players = {}

top_players['firstname'] = []
top_players['lastname'] = []
top_players['rating'] = []

for players in top_scorers_dict:
    top_players['firstname'].append(players['player']['firstname'])
    top_players['lastname'].append(players['player']['lastname'])
    top_players['rating'].append(players['statistics'][0]['games']['rating'])

In [None]:
pd.DataFrame(top_players)

### Football API - Get line ups for fixture 215662

In [None]:
# Football API URL for Line Ups
lineups_url = "https://api-football-v1.p.rapidapi.com/v3/fixtures/lineups"

# Query String
lineups_fixture_215662_querystring = {"fixture":"215662"}

# Get data from API for Countries
lineups_fixture_215662_response = requests.request("GET", lineups_url, headers=api_connection, params=lineups_fixture_215662_querystring)

In [None]:
# Check keys of response
lineups_fixture_215662_response.json().keys()

In [None]:
# Check keys at next level of response for response
lineups_fixture_215662_dict = lineups_fixture_215662_response.json()['response']

In [None]:
lineups_fixture_215662_dict

In [None]:
pd.json_normalize(lineups_fixture_215662_dict)

In [None]:
pd.json_normalize(lineups_fixture_215662_dict, record_path=['startXI'])

In [None]:
pd.json_normalize(lineups_fixture_215662_dict, record_path=['substitutes'])

### Football API - Get all epl fixtures for 2020

In [None]:
# Football API URL for fixtures
fixtures_url = "https://api-football-v1.p.rapidapi.com/v3/fixtures"

# Query String
fixtures_2020_querystring = {"league":"39","season":"2020"}


# Get data from API for Countries
fixtures_2020_response = requests.request("GET", fixtures_url, headers=api_connection, params=fixtures_2020_querystring)

In [None]:
# Check keys of response
fixtures_2020_response.json().keys()

In [None]:
fixtures_2020_response

In [None]:
# Check keys at next level of response for response
fixtures_2020_dict = fixtures_2020_response.json()['response']

In [None]:
# Create dictionary of results for 'leagues' key
leagues_dict = epl_response.json()['api']['leagues']

# Visualize df for all English Premier league seasons available
leagues_df = pd.DataFrame.from_dict(leagues_dict)
leagues_df

In [None]:
pd.set_option('display.max_columns', None)
epl_2020_fixtures_df.head()

In [None]:
epl_2020_fixtures_df[['fixture.venue.city','teams.home.name','teams.away.name','goals.home','goals.away', 'teams.home.winner']]

### Football API - This Section Returns the Yearly League Id's for the Top 4 Leagues Worldwide
#### Get All Leagues from the Top 4 countries England, France, Italy and Spain and

In [None]:
# Football API URL for top leagues 
england_leagues_url = "https://api-football-v1.p.rapidapi.com/v2/leagues/search/england"
france_leagues_url = "https://api-football-v1.p.rapidapi.com/v2/leagues/search/france"
italy_leagues_url = "https://api-football-v1.p.rapidapi.com/v2/leagues/search/italy"    
spain_leagues_url = "https://api-football-v1.p.rapidapi.com/v2/leagues/search/spain"

# Get data from API for Countries
england_leagues_response = requests.request("GET", england_leagues_url, headers=api_connection)
france_leagues_response = requests.request("GET", france_leagues_url, headers=api_connection)
italy_leagues_response = requests.request("GET", italy_leagues_url, headers=api_connection)
spain_leagues_response = requests.request("GET", spain_leagues_url, headers=api_connection)

In [None]:
# Check keys of response
england_leagues_response.json().keys()

In [None]:
# Check keys at next level of response for response
england_leagues_dict = england_leagues_response.json()['api']['leagues']
france_leagues_dict = france_leagues_response.json()['api']['leagues']
italy_leagues_dict = italy_leagues_response.json()['api']['leagues']
spain_leagues_dict = spain_leagues_response.json()['api']['leagues']

In [None]:
# Normalise all the Top Leagues to Data Frames
england_leagues_df = pd.json_normalize(england_leagues_dict)
france_leagues_df = pd.json_normalize(france_leagues_dict)
italy_leagues_df = pd.json_normalize(italy_leagues_dict)
spain_leagues_df = pd.json_normalize(spain_leagues_dict)

#### Get Just the top level league for each country for all years 

In [None]:
# Create Data Fames for all seasons for the top leagues
# Select Just the English Premier League
epl_df = england_leagues_df.loc[england_leagues_df['name'] == 'Premier League']
epl_leagueid_df = epl_df.sort_values('season')


# Select Just the France Ligue 1
ligue1_df = france_leagues_df.loc[france_leagues_df['name'] == 'Ligue 1']
ligue1_leagueid_df = ligue1_df.sort_values('season')

# Select Just the Italy Serie A
seriea_df = italy_leagues_df.loc[italy_leagues_df['name'] == 'Serie A']
seriea_leagueid_df = seriea_df.sort_values('season')

# Select Just the Spain La Liga
laliga_df = spain_leagues_df.loc[spain_leagues_df['name'] == 'La Liga']
laliga_leagueid_df = laliga_df.sort_values('season')

### The League IDs in the data frames below can now be used in api queries to return data for specific leagues/seasons

In [None]:
# Examine the League ID for the EPL for each season
epl_leagueid_df.tail(2)

In [None]:
# Examine the League ID for the Ligue 1 for each season
ligue1_leagueid_df.tail(2)

In [None]:
# Examine the League ID for the Seria A for each season
seriea_leagueid_df.tail(2)

In [None]:
# Examine the League ID for the La Liga for each season
laliga_leagueid_df.tail(2)

### Football API - Using the League IDs we now have return the Teams in each league for the year 2021

In [None]:
# Football API URL for top teams 
epl_2021_teams_url = " https://api-football-v1.p.rapidapi.com/v2/teams/league/3456"
ligue1_2021_teams_url = "https://api-football-v1.p.rapidapi.com/v2/teams/league/3506"
seriea_2021_teams_url = "https://api-football-v1.p.rapidapi.com/v2/teams/league/3576"    
laliga_2021_teams_url = "https://api-football-v1.p.rapidapi.com/v2/teams/league/3513"

# Get data from API for Countries
epl_2021_teams_response = requests.request("GET", epl_2021_teams_url, headers=api_connection)
ligue1_2021_teams_response = requests.request("GET", ligue1_2021_teams_url, headers=api_connection)
seriea_2021_teams_response = requests.request("GET", seriea_2021_teams_url, headers=api_connection)
laliga_2021_teams_response = requests.request("GET", laliga_2021_teams_url, headers=api_connection)

In [None]:
# Check keys of response
epl_2021_teams_response.json().keys()

In [None]:
# Check keys at next level of response for response
epl_2021_teams_response.json()['api']['teams']

In [None]:
# Create objects from JSON repsonse
epl_2021_teams_dict = epl_2021_teams_response.json()['api']['teams']
ligue1_2021_teams_dict = ligue1_2021_teams_response.json()['api']['teams']
seriea_2021_teams_dict = seriea_2021_teams_response.json()['api']['teams']
laliga_2021_teams_dict = laliga_2021_teams_response.json()['api']['teams']

In [None]:
# Normalise all the teams in the top 4 leagues
epl_2021_teams_df = pd.json_normalize(epl_2021_teams_dict)
ligue1_2021_teams_df = pd.json_normalize(ligue1_2021_teams_dict)
seriea_2021_teams_df = pd.json_normalize(seriea_2021_teams_dict)
laliga_2021_teams_df = pd.json_normalize(laliga_2021_teams_dict)

### The team_ids, team codes, venue names can now be used in api queries for players or matches

In [113]:
# Examine the EPL Teams for the 2021 Season
epl_2021_teams_df.head()

Unnamed: 0,team_id,name,code,logo,country,is_national,founded,venue_name,venue_surface,venue_address,venue_city,venue_capacity
0,33,Manchester United,MUN,https://media.api-sports.io/football/teams/33.png,England,False,1878,Old Trafford,grass,Sir Matt Busby Way,Manchester,76212
1,34,Newcastle,,https://media.api-sports.io/football/teams/34.png,England,False,1892,St. James' Park,grass,St. James&apos; Street,Newcastle upon Tyne,52389
2,38,Watford,,https://media.api-sports.io/football/teams/38.png,England,False,1881,Vicarage Road,grass,Vicarage Road,Watford,22200
3,39,Wolves,,https://media.api-sports.io/football/teams/39.png,England,False,1877,Molineux Stadium,grass,Waterloo Road,"Wolverhampton, West Midlands",32050
4,40,Liverpool,,https://media.api-sports.io/football/teams/40.png,England,False,1892,Anfield,grass,Anfield Road,Liverpool,55212


In [114]:
# Examine the Ligue1 Teams for the 2021 Season
ligue1_2021_teams_df.head()

Unnamed: 0,team_id,name,code,logo,country,is_national,founded,venue_name,venue_surface,venue_address,venue_city,venue_capacity
0,77,Angers,ANG,https://media.api-sports.io/football/teams/77.png,France,False,1919,Stade Raymond-Kopa,grass,"73, boulevard Pierre de Coubertin",Angers,17835
1,78,Bordeaux,BOR,https://media.api-sports.io/football/teams/78.png,France,False,1881,Stade Matmut-Atlantique,grass,Cours Jules Ladoumegue,Bordeaux,42115
2,79,Lille,LIL,https://media.api-sports.io/football/teams/79.png,France,False,1944,Stade Pierre-Mauroy,grass,"261, Boulevard de Tournai, l&apos;Hôtel de Ville",Villeneuve d&apos;Ascq,50083
3,80,Lyon,LYO,https://media.api-sports.io/football/teams/80.png,France,False,1950,Groupama Stadium,grass,Chemin du Montout,Décines-Charpieu,61556
4,81,Marseille,OLM,https://media.api-sports.io/football/teams/81.png,France,False,1899,Orange Vélodrome,grass,"3, boulevard Michelet",Marseille,67394


In [115]:
# Examine the Serie A Teams for the 2021 Season
seriea_2021_teams_df.head()

Unnamed: 0,team_id,name,code,logo,country,is_national,founded,venue_name,venue_surface,venue_address,venue_city,venue_capacity
0,487,Lazio,,https://media.api-sports.io/football/teams/487...,Italy,False,1900,Stadio Olimpico,grass,"Viale dei Gladiatori, 2 / Via del Foro Italico",Roma,68530
1,488,Sassuolo,,https://media.api-sports.io/football/teams/488...,Italy,False,1922,MAPEI Stadium - Città del Tricolore,grass,"Piazza Azzuri d&apos;Italia, 1",Reggio nell&apos;Emilia,23717
2,489,AC Milan,,https://media.api-sports.io/football/teams/489...,Italy,False,1899,Stadio Giuseppe Meazza,grass,Via Piccolomini 5,Milano,80018
3,490,Cagliari,,https://media.api-sports.io/football/teams/490...,Italy,False,1920,Unipol Domus,grass,Via Raimondo Carta Raspi,Cagliari,16416
4,492,Napoli,,https://media.api-sports.io/football/teams/492...,Italy,False,1904,Stadio Diego Armando Maradona,grass,Pizzale Vincenzo Tecchio,Napoli,60240


In [116]:
# Examine the La Liga Teams for the 2021 Season
laliga_2021_teams_df.head()

Unnamed: 0,team_id,name,code,logo,country,is_national,founded,venue_name,venue_surface,venue_address,venue_city,venue_capacity
0,529,Barcelona,,https://media.api-sports.io/football/teams/529...,Spain,False,1899,Camp Nou,grass,Carrer d&apos;Arístides Maillol,Barcelona,99787
1,530,Atletico Madrid,,https://media.api-sports.io/football/teams/530...,Spain,False,1903,Estadio Wanda Metropolitano,grass,Rosas,Madrid,68032
2,531,Athletic Club,,https://media.api-sports.io/football/teams/531...,Spain,False,1898,San Mamés Barria,grass,Rafael Moreno Pitxitxi Kalea,Bilbao,53289
3,532,Valencia,,https://media.api-sports.io/football/teams/532...,Spain,False,1919,Estadio de Mestalla,grass,Avenida de Suecia,Valencia,55000
4,533,Villarreal,,https://media.api-sports.io/football/teams/533...,Spain,False,1923,Estadio de la Cerámica,grass,Plaza Labrador,Villarreal,24500
