In [2]:
import requests
import json
import time
import pandas as pd
import mysql.connector
from IPython.display import clear_output
from datetime import datetime
from config import api_football_key, conn_host, conn_database, conn_user, conn_password

In [3]:
league_id = 78 # Choose the league id
start_season = 2015
end_season = 2023

now = time.time()

In [30]:
headers = {
        'X-RapidAPI-Key': api_football_key,
        'X-RapidAPI-Host': 'api-football-v1.p.rapidapi.com'
    }

def connect_to_db():
    return mysql.connector.connect(host=conn_host, 
                                     database=conn_database,
                                     user=conn_user,
                                     password=conn_password)

def execute_query(query, read_only = True):
    resp = None
    try:
        db = connect_to_db()
        if read_only:
            resp = pd.read_sql_query(query, db)
        else:
            mycursor = db.cursor()
            mycursor.execute(query)

            db.commit()
        db.close()
    except Exception as e:
        print(e)
    return resp

def execute_multiple_queries(queries):
    try:
        db = connect_to_db()
        mycursor = db.cursor()
        for query in queries:
            mycursor.execute(query)

        db.commit()
        db.close()
    except Exception as e:
        print(e)

In [4]:
# Table creation queries
execute_query("CREATE TABLE IF NOT EXISTS leagues (id INT NOT NULL, name VARCHAR(50) NOT NULL, PRIMARY KEY (id))", False)
execute_query("CREATE TABLE IF NOT EXISTS teams (id INT NOT NULL, name VARCHAR(50) NOT NULL, PRIMARY KEY (id))", False)
execute_query("CREATE TABLE IF NOT EXISTS matches (id INT NOT NULL, date DATETIME NOT NULL, league_id INT NOT NULL, season INT NOT NULL, home_id INT NOT NULL, away_id INT NOT NULL, home_score INT NULL, away_score INT NULL, PRIMARY KEY (id), FOREIGN KEY(home_id) REFERENCES teams (id), FOREIGN KEY(away_id) REFERENCES teams (id), FOREIGN KEY(league_id) REFERENCES leagues (id))", False)

In [5]:
def get_league_season_fixtures(season):
    response = requests.get(f"https://api-football-v1.p.rapidapi.com/v3/fixtures?league={league_id}&season={season}", headers=headers)
    response_parsed = json.loads(response.text)
    return [fixture for fixture in response_parsed['response'] if fixture['fixture']['timestamp'] < now]

In [6]:
def add_match_info_to_db(fixture):
    fixture_id, league_id, league_name, fixture_date, season, home_id, home_name, away_id, away_name, home_score, away_score = fixture['fixture']['id'], fixture['league']['id'], f"{fixture['league']['name']} ({fixture['league']['country']})", fixture['fixture']['timestamp'], fixture['league']['season'], fixture['teams']['home']['id'], fixture['teams']['home']['name'], fixture['teams']['away']['id'], fixture['teams']['away']['name'], fixture['goals']['home'], fixture['goals']['away']
    fixture_date_converted = datetime.fromtimestamp(fixture_date).strftime('%Y-%m-%d %H:%M:%S')
    execute_query(f"INSERT IGNORE INTO leagues (id, name) VALUES ({league_id}, '{league_name}')", False)
    execute_query(f"INSERT IGNORE INTO teams (id, name) VALUES ({home_id}, '{home_name}')", False)
    execute_query(f"INSERT IGNORE INTO teams (id, name) VALUES ({away_id}, '{away_name}')", False)
    execute_query(f"INSERT IGNORE INTO matches (id, date, league_id, season, home_id, away_id, home_score, away_score) VALUES ({fixture_id}, '{fixture_date_converted}', {league_id}, {season}, {home_id}, {away_id}, '{home_score}', '{away_score}')", False)
#     print(f"{fixture['teams']['home']['name']} {fixture['goals']['home']} x {fixture['goals']['away']} {fixture['teams']['away']['name']}")

In [14]:
for season in range(start_season, end_season):
    fixtures = get_league_season_fixtures(season)
    for index, fixture in enumerate(fixtures):
        clear_output(wait=True)
        print(f"Loading fixtures for the {season} season: {index}/{len(fixtures)}")
        add_match_info_to_db(fixture)

Loading fixtures for the 2022 season: 118/119


In [5]:
fixtures_df = execute_query(f"SELECT m.id, m.date, m.season, l.name AS league, ht.name as home_team, at.name as away_team, m.home_score, m.away_score FROM matches AS m INNER JOIN teams AS ht ON (m.home_id = ht.id) INNER JOIN teams AS at ON (m.away_id = at.id) INNER JOIN leagues AS l ON (m.league_id = l.id) WHERE (l.id = {league_id} AND m.season >= {start_season} and m.season < {end_season}) ORDER BY m.date DESC")



In [6]:
fixtures_df.head()

Unnamed: 0,id,date,season,league,home_team,away_team,home_score,away_score
0,862168,2022-05-23 15:30:00,2021,Bundesliga 1 (Germany),Hamburger SV,Hertha Berlin,0,2
1,862167,2022-05-19 15:30:00,2021,Bundesliga 1 (Germany),Hertha Berlin,Hamburger SV,0,1
2,719646,2022-05-14 10:30:00,2021,Bundesliga 1 (Germany),Dortmund,Hertha Berlin,2,1
3,719647,2022-05-14 10:30:00,2021,Bundesliga 1 (Germany),Wolfsburg,Bayern Munich,2,2
4,719648,2022-05-14 10:30:00,2021,Bundesliga 1 (Germany),Bayer Leverkusen,Freiburg,2,1


In [27]:
execute_query("ALTER TABLE matches " +
              "ADD home_shots_on_goal INT NULL," +
              "ADD home_shots_off_goal INT NULL," +
              "ADD home_total_shots INT NULL," +
              "ADD home_blocked_shots INT NULL," +
              "ADD home_shots_inside_box INT NULL," +
              "ADD home_shots_outside_box INT NULL," +
              "ADD home_fouls INT NULL," +
              "ADD home_corners INT NULL," +
              "ADD home_offsides INT NULL," +
              "ADD home_possession FLOAT NULL," +
              "ADD home_yellow_cards INT NULL," +
              "ADD home_red_cards INT NULL," +
              "ADD home_saves INT NULL," +
              "ADD home_total_passes INT NULL," +
              "ADD home_passes_accurate INT NULL," +
              "ADD home_passes_pct FLOAT NULL," + 
              "ADD away_shots_on_goal INT NULL," +
              "ADD away_shots_off_goal INT NULL," +
              "ADD away_total_shots INT NULL," +
              "ADD away_blocked_shots INT NULL," +
              "ADD away_shots_inside_box INT NULL," +
              "ADD away_shots_outside_box INT NULL," +
              "ADD away_fouls INT NULL," +
              "ADD away_corners INT NULL," +
              "ADD away_offsides INT NULL," +
              "ADD away_possession FLOAT NULL," +
              "ADD away_yellow_cards INT NULL," +
              "ADD away_red_cards INT NULL," +
              "ADD away_saves INT NULL," +
              "ADD away_total_passes INT NULL," +
              "ADD away_passes_accurate INT NULL," +
              "ADD away_passes_pct FLOAT NULL;")

'NoneType' object is not iterable


In [28]:
def get_league_season_fixtures(fixture_id):
    response = requests.get(f"https://api-football-v1.p.rapidapi.com/v3/fixtures/statistics?fixture={fixture_id}", headers=headers)
    response_parsed = json.loads(response.text)
    return response_parsed['response']

def update_match_stats(fixture_id, home_stats, away_stats):
    home_update_query = f"SET home_shots_on_goal = {home_stats[0] or 0}, home_shots_off_goal = {home_stats[1] or 0}, home_total_shots = {home_stats[2] or 0}, home_blocked_shots = {home_stats[3] or 0}, home_shots_inside_box = {home_stats[4] or 0}, home_shots_outside_box = {home_stats[5] or 0}, home_fouls = {home_stats[6] or 0}, home_corners = {home_stats[7] or 0}, home_offsides = {home_stats[8] or 0}, home_possession = {home_stats[9].replace('%', '')}, home_yellow_cards = {home_stats[10] or 0}, home_red_cards = {home_stats[11] or 0}, home_saves = {home_stats[12] or 0}, home_total_passes = {home_stats[13] or 0}, home_passes_accurate = {home_stats[14] or 0}, home_passes_pct = {home_stats[15].replace('%', '')}"
    away_update_query = f"away_shots_on_goal = {away_stats[0] or 0}, away_shots_off_goal = {away_stats[1] or 0}, away_total_shots = {away_stats[2] or 0}, away_blocked_shots = {away_stats[3] or 0}, away_shots_inside_box = {away_stats[4] or 0}, away_shots_outside_box = {away_stats[5] or 0}, away_fouls = {away_stats[6] or 0}, away_corners = {away_stats[7] or 0}, away_offsides = {away_stats[8] or 0}, away_possession = {away_stats[9].replace('%', '')}, away_yellow_cards = {away_stats[10] or 0}, away_red_cards = {away_stats[11] or 0}, away_saves = {away_stats[12] or 0}, away_total_passes = {away_stats[13] or 0}, away_passes_accurate = {away_stats[14] or 0}, away_passes_pct = {away_stats[15].replace('%', '')}"
    update_query = f"UPDATE matches {home_update_query}, {away_update_query} WHERE id = {fixture_id}"
    return update_query

In [None]:
def get_fixtures_statistics():
    update_queries = []

    for index, g in fixtures_df.iterrows():
        clear_output(wait=True)
        print(f"{index}/{len(fixtures_df.index)}")
        stats = get_league_season_fixtures(g['id'])
        home_stats = [s['value'] for s in stats[0]['statistics']]
        away_stats = [s['value'] for s in stats[1]['statistics']]
        query = update_match_stats(g['id'], home_stats, away_stats)
        update_queries.append(query)
        
    execute_multiple_queries(update_queries)

In [33]:
# Makes one request per fixture, so uses way more of your quota of requests to the Football-Api
allow_get_fixture_statistics = False

if allow_get_fixture_statistics: get_fixtures_statistics()

2149/2150
