In [1]:
# import required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


import requests as req
import os
import json

In [2]:

# fetch api-key for sports-api from secret file

filename = "secret.txt"
f = None
api_key = ""
try:
    f = open(filename,'r')
    api_key = f.readline()
except Exception as ex:
    print(ex.args)
finally:
    if f:
        f.close()

headers = {
    'x-rapidapi-host': "v3.football.api-sports.io",
    'x-apisports-key': api_key
}

print(headers)

{'x-rapidapi-host': 'v3.football.api-sports.io', 'x-apisports-key': 'c65f6c47d7adb17d02b628bceecb5ab6'}


In [3]:

# function to fetch data from sports api
def get_data(endpoint_url):
    response = req.get(endpoint_url, headers=headers)
    if response.status_code == 200:
        return response
    else:
        print(f"API Error: Unable to fetch data from api. Status code: {response.status_code}")

In [4]:
# function to save json data obtained from api into a json file
def save_data_as_json(data, file_path):
    with open(file_path, 'w') as json_file:
        json.dump(data, json_file)
    print(f'JSON Data is successfully saved to {file_path}')


In [5]:
# function to check and load data, if data is already fetched and saved.
def read_data_from_file(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'r') as json_file:
            return json.load(json_file)
    else:
        print(f"File not found: {file_path}")
        return None

#### Data analysis of English Premier League, Season 2022 -2023

English Premier League ( ID - 39 )

Season - 2022

##### Get all teams from EPL 2022-2023

In [6]:

# url to fetch teams data from
teams_ai_url = "https://v3.football.api-sports.io/teams?league=39&season=2022"

# file path to save/load api-data
team_api_data_filepath = './data/api-response-store/teams_api_response.json'


# try to load teams data from file
teams_api_response = read_data_from_file(team_api_data_filepath)

if teams_api_response:
    print(f'successfully loaded teams data from file')
    # print(teams_api_response)
else:
    print(f'Unable to load data from file. Fetching data from API')
    teams_api_response = get_data(teams_url)
    if teams_api_response:
        save_data_as_json(teams_api_response.json(), team_api_data_filepath)

successfully loaded teams data from file


In [7]:
teams_epl_2022 = teams_api_response["response"]
teams_filepath = "./data/teams_epl_2022.json"
save_data_as_json(teams_epl_2022, teams_filepath)

print(len(teams_epl_2022))
print(teams_epl_2022[0])

JSON Data is successfully saved to ./data/teams_epl_2022.json
20
{'team': {'id': 33, 'name': 'Manchester United', 'code': 'MUN', 'country': 'England', 'founded': 1878, 'national': False, 'logo': 'https://media.api-sports.io/football/teams/33.png'}, 'venue': {'id': 556, 'name': 'Old Trafford', 'address': 'Sir Matt Busby Way', 'city': 'Manchester', 'capacity': 76212, 'surface': 'grass', 'image': 'https://media.api-sports.io/football/venues/556.png'}}


#### Get players Data for each team

create a function with a while condition to handle pagination, if a team has more than 20 players
create solution to handle rate limitting by api-provider
fetch the rate limitting data from api-response headers


In [26]:

import time

players = []

request_per_minute_remaining = 10
current_request_per_day_remaining = 100

for team in teams_epl_2022:
    team_id = team["team"]["id"] # fetch id of each team
    league_id = 39 # league English Premier league
    season = 2022 # season 2022-2023
    current_page = 1 # for api pagination, needs to be updated on every api response
    total_page = 1 # for api pagination, need to be updated on every api response
    players_api_url = f"https://v3.football.api-sports.io/players?season={season}&league={league_id}&team={team_id}&page={current_page}"
    
    while current_page <= total_page:
        
        # parsed_response = None
        
        if(current_request_per_day_remaining <= 0):
            print("Error: API requests per day limit reached")
            break

        if(request_per_minute_remaining <= 0):
            print("Waiting for one minute: Request per minute limit reached")
            time.sleep(60)
            print("Continuing execution")


        api_response_filepath = f'./data/api-response-store/players_epl_2022_{team_id}_{current_page}.json'

        # try to load teams data from file
        parsed_response = read_data_from_file(api_response_filepath)

        if parsed_response:
            print(f'successfully loaded data from file:  players_epl_2022_{team_id}_{current_page}.json')
        else:
            print(f'Unable to load data from file. Fetching data from API')
            response = get_data(players_api_url)
            if response:
                # rate limit data update
                request_per_minute_remaining = int(response.headers["X-RateLimit-Remaining"])
                print(f'request_per_minute_remaining: {request_per_minute_remaining}')
                current_request_per_day_remaining = int(response.headers["x-ratelimit-requests-remaining"])
                print(f'request_per_day_remaining: {current_request_per_day_remaining}')
                parsed_response = response.json()
                save_data_as_json(parsed_response, api_response_filepath)
        
        # pagination info
        total_page = int(parsed_response["paging"]["total"])

        # update players data
        players.extend(parsed_response["response"])

        # print
        # print(f'team: {team_id}, current_page: {current_page}, total_page: {total_page}', api_response_filepath)
        current_page = int(current_page) + 1



#  Save players data
print(f'Total players: {len(players)}')
players_filepath = "./data/players_epl_2022.json"
save_data_as_json(players, players_filepath)

successfully loaded data from file:  players_epl_2022_33_1.json
successfully loaded data from file:  players_epl_2022_33_2.json
successfully loaded data from file:  players_epl_2022_33_3.json
successfully loaded data from file:  players_epl_2022_34_1.json
successfully loaded data from file:  players_epl_2022_34_2.json
successfully loaded data from file:  players_epl_2022_35_1.json
successfully loaded data from file:  players_epl_2022_35_2.json
successfully loaded data from file:  players_epl_2022_35_3.json
successfully loaded data from file:  players_epl_2022_36_1.json
successfully loaded data from file:  players_epl_2022_36_2.json
successfully loaded data from file:  players_epl_2022_36_3.json
successfully loaded data from file:  players_epl_2022_39_1.json
successfully loaded data from file:  players_epl_2022_39_2.json
successfully loaded data from file:  players_epl_2022_39_3.json
successfully loaded data from file:  players_epl_2022_40_1.json
successfully loaded data from file:  pla

#### Convert JSON Data to CSV

In [27]:
# convert the complex json we have into a simpler one

players_data = []

for player in players:
    # details of player
    details = player["player"]

    # stats of player:
    stats = player["statistics"][0]
    
    data = {
        "first_name": details["firstname"],
        "last_name": details["lastname"],
        "age": details["age"],
        "nationality": details["nationality"],
        "height_cm": details["height"],
        "weight_kg": details["weight"],
        "team_name": stats["team"]["name"],
        "league_name": stats["league"]["name"],
        "season": stats["league"]["season"],
        "game_appearances": stats["games"]["appearences"],
        "game_lineups": stats["games"]["lineups"],
        "game_minutes": stats["games"]["minutes"],
        "game_number": stats["games"]["number"],
        "game_position": stats["games"]["position"],
        "game_rating": stats["games"]["rating"],
        "game_captain": stats["games"]["captain"],
        "shots_total": stats["shots"]["total"],
        "shots_on": stats["shots"]["on"],
        "goals_total": stats["goals"]["total"],
        "goals_conceded": stats["goals"]["conceded"],
        "goals_assists": stats["goals"]["assists"],
        "goals_saves": stats["goals"]["saves"],
        "passes_total": stats["passes"]["total"],
        "passes_key": stats["passes"]["key"],
        "passes_accuracy": stats["passes"]["accuracy"],
        "tackles_total": stats["tackles"]["total"],
        "tackles_blocks": stats["tackles"]["blocks"],
        "tackles_interceptions": stats["tackles"]["interceptions"],
        "duels_total": stats["duels"]["total"],
        "duels_won": stats["duels"]["won"],
        "dribble_attempts": stats["dribbles"]["attempts"],
        "dribble_success": stats["dribbles"]["success"],
        "dribble_past": stats["dribbles"]["past"],
        "fouls_drawn": stats["fouls"]["drawn"],
        "fouls_committed": stats["fouls"]["committed"],
        "cards_yelow": stats["cards"]["yellow"],
        "cards_yellowred": stats["cards"]["yellowred"],
        "cards_red": stats["cards"]["red"],
        "penalty_won": stats["penalty"]["won"],
        "penalty_committed": stats["penalty"]["commited"],
        "penalty_scored": stats["penalty"]["scored"],
        "penalty_missed": stats["penalty"]["missed"],
        "penalty_saved": stats["penalty"]["saved"],
    }
    players_data.append(data)

df = pd.DataFrame(players_data)
df.head(15)
# print(players_data)

Unnamed: 0,first_name,last_name,age,nationality,height_cm,weight_kg,team_name,league_name,season,game_appearances,...,fouls_drawn,fouls_committed,cards_yelow,cards_yellowred,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved
0,Lee,Grant,39.0,England,193 cm,83 kg,Manchester United,Premier League,2022,,...,,,,,,,,,,
1,Tyler,Fredricson,17.0,England,,,Manchester United,Premier League,2022,,...,,,,,,,,,,
2,Björn Bryan,Hardley,20.0,Netherlands,189 cm,,Manchester United,Premier League,2022,,...,,,,,,,,,,
3,Sam,Murray,18.0,England,,,Manchester United,Premier League,2022,,...,,,,,,,,,,
4,David,de Gea Quintana,33.0,Spain,192 cm,76 kg,Manchester United,Premier League,2022,38.0,...,5.0,,0.0,0.0,0.0,,,0.0,0.0,1.0
5,Phil Anthony,Jones,31.0,England,180 cm,71 kg,Manchester United,Premier League,2022,0.0,...,,,0.0,0.0,0.0,,,,,
6,Rhys Joseph,Wright Bennett,21.0,England,,,Manchester United,Premier League,2022,0.0,...,,,0.0,0.0,0.0,,,0.0,0.0,
7,Charlie William Henry,Savage,21.0,Wales,182 cm,74 kg,Manchester United,Premier League,2022,0.0,...,,,0.0,0.0,0.0,,,,,
8,Alex,Nicolao Telles,32.0,Brazil,181 cm,71 kg,Manchester United,Premier League,2022,0.0,...,,,0.0,0.0,0.0,,,,,
9,Marcel,Sabitzer,30.0,Austria,178 cm,76 kg,Manchester United,Premier League,2022,11.0,...,10.0,8.0,1.0,0.0,0.0,,,0.0,0.0,


#### Save the data as CSV

In [28]:
df.head(5)

Unnamed: 0,first_name,last_name,age,nationality,height_cm,weight_kg,team_name,league_name,season,game_appearances,...,fouls_drawn,fouls_committed,cards_yelow,cards_yellowred,cards_red,penalty_won,penalty_committed,penalty_scored,penalty_missed,penalty_saved
0,Lee,Grant,39.0,England,193 cm,83 kg,Manchester United,Premier League,2022,,...,,,,,,,,,,
1,Tyler,Fredricson,17.0,England,,,Manchester United,Premier League,2022,,...,,,,,,,,,,
2,Björn Bryan,Hardley,20.0,Netherlands,189 cm,,Manchester United,Premier League,2022,,...,,,,,,,,,,
3,Sam,Murray,18.0,England,,,Manchester United,Premier League,2022,,...,,,,,,,,,,
4,David,de Gea Quintana,33.0,Spain,192 cm,76 kg,Manchester United,Premier League,2022,38.0,...,5.0,,0.0,0.0,0.0,,,0.0,0.0,1.0


In [29]:
df.shape

(1120, 43)

In [31]:
filepath = "./data/players-stats-epl-2022.csv"
df.to_csv(filepath)