In [1]:
import http.client
import json
import pandas as pd
import requests
import os
import pickle
import time

In [2]:
#HTTPSConnection used as metaclass
class api_management(http.client.HTTPSConnection):
    
    def __init__(self):
        
        #Website for api connection instantiated
        http.client.HTTPSConnection.__init__(self,"v3.football.api-sports.io")
        
        #Api key for connection string
        self.headers = {
        'x-rapidapi-host': "v3.football.api-sports.io",
        'x-rapidapi-key': "**********"
        }

    
    def quota_info(self):
        
        #Instantiating request for api status
        self.request("GET", "/status", headers=self.headers)
        res = self.getresponse()
        data = res.read()
        
        #Loading api status string into object.
        status=json.loads(data.decode("utf-8"))
        
        #Getting current quota count
        self.used=status["response"]["requests"]["current"]
        
        #Getting quota limit of the api
        self.quota=status["response"]["requests"]["limit_day"]
        
        #Calculating remain api quota
        self.remains= self.quota-self.used
        print("used : {}, quota : {}, remains : {}".format(self.used,self.quota,self.remains))
        return self.remains

    
    def get_leagues(self):#Gets available leagues info
        
        
        self.request("GET", "/leagues", headers=self.headers)
        res = self.getresponse()
        data = res.read()
        league_res=json.loads(data.decode("utf-8"))["response"]
        
        #Creating dataframe object for leagues with requared infos
        league_df=pd.json_normalize(league_res,record_path="seasons",meta=[["league","id"],["league","name"], ["country","name"],["league","type"] ])
        
        #Only tournements with league type selected.
        only_leagues= league_df[league_df["league.type"]=="League"][
        ["year","coverage.fixtures.events","coverage.fixtures.statistics_fixtures",
         "league.name","league.id","country.name"]]
        #Only leagues which covers both match stats and events selected
        only_leagues["Coverage"]=  only_leagues[[
        "coverage.fixtures.events",
        "coverage.fixtures.statistics_fixtures"]].applymap(lambda x : 1 if x==True else 0).sum(axis=1)
        leagues_acc= only_leagues[only_leagues["Coverage"]==2]
        
        self.quota_info()
        
        return leagues_acc
    
    def get_fixtures(self,season,league): #Gets available fixtures from given league and season
   
        
        self.request("GET", "/fixtures?season={}&league={}".format(season,league), headers=self.headers)

        res = self.getresponse()
        data = res.read()
        
        fixture_res=json.loads(data.decode("utf-8"))["response"]
        self.quota_info()
        
        return fixture_res
    
    def  fix_stat(self,fixture): #Gets statistics of given fixture 
        
        self.request("GET", "/fixtures/statistics?fixture={}".format(fixture), headers=self.headers)

        res = self.getresponse()
        data = res.read()
        
        stat_res=json.loads(data.decode("utf-8"))["response"]
        
        #There is two statistics rows for each playing team.
        
        for k in range(len(stat_res)):
            
            #Rearranging stat_res dictionary in order to match DataFrame format
            stat_res[k]["statistics"]={i["type"]:i["value"] for i in stat_res[k]["statistics"]}
            #Adding selected fixtureid into data 
            stat_res[k]["fixture"]= {"id":fixture}
        
        return stat_res
    
    def fix_events(self,fixture): #Gets events of given fixture
        self.request("GET", "/fixtures/events?fixture={}".format(fixture), headers=self.headers)

        res = self.getresponse()
        data = res.read()
        event_res=json.loads(data.decode("utf-8"))["response"]
        
        #Each event recorded as row within data
        
        for k in range(len(event_res)):
            #Adding selected fixture id into data.
            event_res[k]["fixture"]= {"id":fixture}
        return event_res

## Data Download Flow

In [3]:
#Selected leagues according to EDA
selected_leagues=[61, 144,  71,  39,  78, 135,  88,  94, 140,  62, 203, 197,  79,
       188, 218, 119,  40,  41,  42,  98,  72, 141, 136, 103,  89, 113,
       169, 207, 210, 235, 292, 307, 106, 265, 323, 475]

In [4]:
api= api_management()

In [5]:
#Directory for saving data
data_dir="C:\\Users\\Arda\\Downloads\\football_master\\Documantation\\Datas\\"

In [6]:
#Requesting leagues from the api

leagues=api.get_leagues()
leagues.to_csv("{}leagues.csv".format(data_dir),index=False)

used : 0, quota : 7500, remains : 7500


In [7]:
#Because we download all data that we need, we can import league dataframe from our local pc.
leagues

Unnamed: 0,year,coverage.fixtures.events,coverage.fixtures.statistics_fixtures,league.name,league.id,country.name,Coverage
13,2015,True,True,Ligue 1,61,France,2
14,2016,True,True,Ligue 1,61,France,2
15,2017,True,True,Ligue 1,61,France,2
16,2018,True,True,Ligue 1,61,France,2
17,2019,True,True,Ligue 1,61,France,2
...,...,...,...,...,...,...,...
4930,2024,True,True,Paulista - A1,475,Brazil,2
4945,2020,True,True,Canadian Premier League,479,Canada,2
4948,2023,True,True,Canadian Premier League,479,Canada,2
5962,2023,True,True,Copa de la Liga Profesional,1032,Argentina,2


## Fixtures

In [8]:
#We will get fixtures for given league and season
#But we need to stay in allowed api call rate 450/60

In [9]:
#We will use rate of change in time/calls. Becouse we will observe given time change according to one api call
call_rate= 60/300
call_rate

0.2

In [10]:
#If there is saved fixture dataframe, the dataframe selected.

try:
    
    saved_fixtures_df=pd.read_csv("{}saved_fixtures.csv".format(data_dir))
except:
    saved_fixtures_df=[]
    


In [11]:
saved_fixtures_df

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
0,185545,"Fredy Fautrel, France",UTC,2015-08-07T18:30:00+00:00,1438972200,1.438972e+09,1.438976e+09,,Stade Pierre Mauroy,Lille,...,0.0,1.0,0.0,0.0,0.0,1.0,,,,
1,185546,"Wilfried Bien, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Stade de la Beaujoire,Nantes,...,1.0,0.0,0.0,0.0,1.0,0.0,,,,
2,185547,"Hakim Ben el Salem Hadj, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,660.0,Stade de la Mosson,Montpellier,...,0.0,2.0,0.0,1.0,0.0,2.0,,,,
3,185548,"Lionel Jaffredo, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Allianz Rivera,Nice,...,1.0,2.0,1.0,0.0,1.0,2.0,,,,
4,185549,"Frank Schneider, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,667.0,Orange Velodrome,Marseille,...,0.0,1.0,0.0,1.0,0.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91823,1146819,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,5664.0,Estádio Dr. Jorge Ismael de Biasi,"Novo Horizonte, São Paulo",...,,,,,,,,,,
91824,1146820,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,258.0,Allianz Parque,"São Paulo, São Paulo",...,,,,,,,,,,
91825,1146821,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,7045.0,Estádio Bruno José Daniel,"Santo André, São Paulo",...,,,,,,,,,,
91826,1146822,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,10494.0,Estádio Urbano Caldeira,"Santos, São Paulo",...,,,,,,,,,,


In [12]:
#Fixtures with selected leagues
saved_fixtures_df=saved_fixtures_df[saved_fixtures_df["league.id"].isin(selected_leagues)]

In [13]:
saved_fixtures_df.columns

Index(['fixture.id', 'fixture.referee', 'fixture.timezone', 'fixture.date',
       'fixture.timestamp', 'fixture.periods.first', 'fixture.periods.second',
       'fixture.venue.id', 'fixture.venue.name', 'fixture.venue.city',
       'fixture.status.long', 'fixture.status.short', 'fixture.status.elapsed',
       'league.id', 'league.name', 'league.country', 'league.logo',
       'league.season', 'league.round', 'teams.home.id', 'teams.home.name',
       'teams.home.winner', 'teams.away.id', 'teams.away.name',
       'teams.away.winner', 'goals.home', 'goals.away', 'score.halftime.home',
       'score.halftime.away', 'score.fulltime.home', 'score.fulltime.away',
       'score.extratime.home', 'score.extratime.away', 'score.penalty.home',
       'score.penalty.away'],
      dtype='object')

In [14]:
#MAX season for each leagues
max_fixtures=saved_fixtures_df.groupby("league.id",as_index=False)["league.season"].max().rename({"league.season":"max_season"},axis=1)
max_fixtures

Unnamed: 0,league.id,max_season
0,39,2023
1,40,2023
2,41,2023
3,42,2023
4,61,2023
5,62,2023
6,71,2023
7,72,2023
8,78,2023
9,79,2023


In [15]:
last_seasons=leagues.merge(max_fixtures)

#Not yet downloaded fixture seasons selected.
last_seasons_v1=last_seasons[last_seasons.max_season<=last_seasons["year"]].sort_values(["league.id","year"]).drop("max_season",axis=1)
last_seasons_v1

Unnamed: 0,year,coverage.fixtures.events,coverage.fixtures.statistics_fixtures,league.name,league.id,country.name,Coverage
36,2023,True,True,Premier League,39,England,2
157,2023,True,True,Championship,40,England,2
166,2023,True,True,League One,41,England,2
173,2023,True,True,League Two,42,England,2
8,2023,True,True,Ligue 1,61,France,2
92,2023,True,True,Ligue 2,62,France,2
26,2023,True,True,Serie A,71,Brazil,2
189,2023,True,True,Serie B,72,Brazil,2
45,2023,True,True,Bundesliga,78,Germany,2
119,2023,True,True,2. Bundesliga,79,Germany,2


In [16]:
#Allready downloaded fixture seasons selected.
prior_saved_fixtures=saved_fixtures_df.merge(max_fixtures)
prior_saved_fixtures=prior_saved_fixtures[prior_saved_fixtures["league.season"]<prior_saved_fixtures.max_season]
prior_saved_fixtures

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away,max_season
0,185545,"Fredy Fautrel, France",UTC,2015-08-07T18:30:00+00:00,1438972200,1.438972e+09,1.438976e+09,,Stade Pierre Mauroy,Lille,...,1.0,0.0,0.0,0.0,1.0,,,,,2023
1,185546,"Wilfried Bien, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Stade de la Beaujoire,Nantes,...,0.0,0.0,0.0,1.0,0.0,,,,,2023
2,185547,"Hakim Ben el Salem Hadj, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,660.0,Stade de la Mosson,Montpellier,...,2.0,0.0,1.0,0.0,2.0,,,,,2023
3,185548,"Lionel Jaffredo, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Allianz Rivera,Nice,...,2.0,1.0,0.0,1.0,2.0,,,,,2023
4,185549,"Frank Schneider, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,667.0,Orange Velodrome,Marseille,...,1.0,0.0,1.0,0.0,1.0,,,,,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91727,1014795,Flavio Rodrigues De Souza,UTC,2023-03-19T19:00:00+00:00,1679252400,1.679252e+09,1.679256e+09,258.0,Allianz Parque,"São Paulo, São Paulo",...,0.0,0.0,0.0,1.0,0.0,,,,,2024
91728,1014796,Raphael Claus,UTC,2023-03-21T00:00:00+00:00,1679356800,1.679357e+09,1.679360e+09,10494.0,Estádio Urbano Caldeira,"Santos, São Paulo",...,1.0,0.0,1.0,1.0,1.0,0.0,0.0,4.0,2.0,2024
91729,1017173,Edina Alves Batista,UTC,2023-04-02T19:00:00+00:00,1680462000,1.680462e+09,1.680466e+09,255.0,Arena Barueri,"Barueri, São Paulo",...,1.0,1.0,0.0,2.0,1.0,,,,,2024
91730,1017174,Raphael Claus,UTC,2023-04-09T19:00:00+00:00,1681066800,1.681067e+09,1.681070e+09,258.0,Allianz Parque,"São Paulo, São Paulo",...,0.0,3.0,0.0,4.0,0.0,,,,,2024


In [17]:

saved_fixtures_download=[]
quota=api.quota_info()-10

#For loop for downloading, new fixtures datas.

for i in range(last_seasons_v1.shape[0]):
    
    season=last_seasons_v1.iloc[i]["year"]
    league=last_seasons_v1.iloc[i]["league.id"]
    current=time.time()
    
    #Downloaded fixtures added to  saved_fixtures_download list
    saved_fixtures_download=saved_fixtures_download+api.get_fixtures(season,league)
    diff=time.time()-current
    if diff < call_rate:
        time.sleep(0.2)
    print(quota,league,season,i)




used : 0, quota : 7500, remains : 7500
used : 0, quota : 7500, remains : 7500
7490 39 2023 0
used : 0, quota : 7500, remains : 7500
7490 40 2023 1
used : 0, quota : 7500, remains : 7500
7490 41 2023 2
used : 0, quota : 7500, remains : 7500
7490 42 2023 3
used : 0, quota : 7500, remains : 7500
7490 61 2023 4
used : 0, quota : 7500, remains : 7500
7490 62 2023 5
used : 0, quota : 7500, remains : 7500
7490 71 2023 6
used : 0, quota : 7500, remains : 7500
7490 72 2023 7
used : 0, quota : 7500, remains : 7500
7490 78 2023 8
used : 0, quota : 7500, remains : 7500
7490 79 2023 9
used : 0, quota : 7500, remains : 7500
7490 88 2023 10
used : 0, quota : 7500, remains : 7500
7490 89 2023 11
used : 0, quota : 7500, remains : 7500
7490 94 2023 12
used : 0, quota : 7500, remains : 7500
7490 98 2023 13
used : 0, quota : 7500, remains : 7500
7490 103 2023 14
used : 0, quota : 7500, remains : 7500
7490 106 2023 15
used : 0, quota : 7500, remains : 7500
7490 113 2023 16
used : 0, quota : 7500, remains :

In [18]:
#Newly saved fixtures, converted to dataframe.
post_saved_fixtures=pd.json_normalize(saved_fixtures_download).drop(['teams.home.logo','teams.away.logo','league.flag'],axis=1)


In [19]:
prior_saved_fixtures

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away,max_season
0,185545,"Fredy Fautrel, France",UTC,2015-08-07T18:30:00+00:00,1438972200,1.438972e+09,1.438976e+09,,Stade Pierre Mauroy,Lille,...,1.0,0.0,0.0,0.0,1.0,,,,,2023
1,185546,"Wilfried Bien, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Stade de la Beaujoire,Nantes,...,0.0,0.0,0.0,1.0,0.0,,,,,2023
2,185547,"Hakim Ben el Salem Hadj, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,660.0,Stade de la Mosson,Montpellier,...,2.0,0.0,1.0,0.0,2.0,,,,,2023
3,185548,"Lionel Jaffredo, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Allianz Rivera,Nice,...,2.0,1.0,0.0,1.0,2.0,,,,,2023
4,185549,"Frank Schneider, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,667.0,Orange Velodrome,Marseille,...,1.0,0.0,1.0,0.0,1.0,,,,,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91727,1014795,Flavio Rodrigues De Souza,UTC,2023-03-19T19:00:00+00:00,1679252400,1.679252e+09,1.679256e+09,258.0,Allianz Parque,"São Paulo, São Paulo",...,0.0,0.0,0.0,1.0,0.0,,,,,2024
91728,1014796,Raphael Claus,UTC,2023-03-21T00:00:00+00:00,1679356800,1.679357e+09,1.679360e+09,10494.0,Estádio Urbano Caldeira,"Santos, São Paulo",...,1.0,0.0,1.0,1.0,1.0,0.0,0.0,4.0,2.0,2024
91729,1017173,Edina Alves Batista,UTC,2023-04-02T19:00:00+00:00,1680462000,1.680462e+09,1.680466e+09,255.0,Arena Barueri,"Barueri, São Paulo",...,1.0,1.0,0.0,2.0,1.0,,,,,2024
91730,1017174,Raphael Claus,UTC,2023-04-09T19:00:00+00:00,1681066800,1.681067e+09,1.681070e+09,258.0,Allianz Parque,"São Paulo, São Paulo",...,0.0,3.0,0.0,4.0,0.0,,,,,2024


In [20]:
post_saved_fixtures

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
0,1035037,"Craig Pawson, England",UTC,2023-08-11T19:00:00+00:00,1691780400,1.691780e+09,1.691784e+09,512.0,Turf Moor,Burnley,...,0.0,3.0,0.0,2.0,0.0,3.0,,,,
1,1035038,"Michael Oliver, England",UTC,2023-08-12T11:30:00+00:00,1691839800,1.691840e+09,1.691843e+09,494.0,Emirates Stadium,London,...,2.0,1.0,2.0,0.0,2.0,1.0,,,,
2,1035039,"Peter Bankes, England",UTC,2023-08-12T14:00:00+00:00,1691848800,1.691849e+09,1.691852e+09,504.0,Vitality Stadium,"Bournemouth, Dorset",...,1.0,1.0,0.0,0.0,1.0,1.0,,,,
3,1035040,"David Coote, England",UTC,2023-08-12T14:00:00+00:00,1691848800,1.691849e+09,1.691852e+09,508.0,The American Express Community Stadium,"Falmer, East Sussex",...,4.0,1.0,1.0,0.0,4.0,1.0,,,,
4,1035041,"Stuart Attwell, England",UTC,2023-08-12T14:00:00+00:00,1691848800,1.691849e+09,1.691852e+09,8560.0,Goodison Park,Liverpool,...,0.0,1.0,0.0,0.0,0.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10871,1146819,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,5664.0,Estádio Dr. Jorge Ismael de Biasi,"Novo Horizonte, São Paulo",...,,,,,,,,,,
10872,1146820,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,258.0,Allianz Parque,"São Paulo, São Paulo",...,,,,,,,,,,
10873,1146821,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,7045.0,Estádio Bruno José Daniel,"Santo André, São Paulo",...,,,,,,,,,,
10874,1146822,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,10494.0,Estádio Urbano Caldeira,"Santos, São Paulo",...,,,,,,,,,,


In [21]:
#Previously and post saved fixtures joined
saved_fixtures_df= pd.concat([prior_saved_fixtures,post_saved_fixtures]).drop("max_season",axis=1)
saved_fixtures_df

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
0,185545,"Fredy Fautrel, France",UTC,2015-08-07T18:30:00+00:00,1438972200,1.438972e+09,1.438976e+09,,Stade Pierre Mauroy,Lille,...,0.0,1.0,0.0,0.0,0.0,1.0,,,,
1,185546,"Wilfried Bien, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Stade de la Beaujoire,Nantes,...,1.0,0.0,0.0,0.0,1.0,0.0,,,,
2,185547,"Hakim Ben el Salem Hadj, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,660.0,Stade de la Mosson,Montpellier,...,0.0,2.0,0.0,1.0,0.0,2.0,,,,
3,185548,"Lionel Jaffredo, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Allianz Rivera,Nice,...,1.0,2.0,1.0,0.0,1.0,2.0,,,,
4,185549,"Frank Schneider, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,667.0,Orange Velodrome,Marseille,...,0.0,1.0,0.0,1.0,0.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10871,1146819,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,5664.0,Estádio Dr. Jorge Ismael de Biasi,"Novo Horizonte, São Paulo",...,,,,,,,,,,
10872,1146820,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,258.0,Allianz Parque,"São Paulo, São Paulo",...,,,,,,,,,,
10873,1146821,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,7045.0,Estádio Bruno José Daniel,"Santo André, São Paulo",...,,,,,,,,,,
10874,1146822,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,10494.0,Estádio Urbano Caldeira,"Santos, São Paulo",...,,,,,,,,,,


In [22]:
#We covert json format to dataframe and save it   

saved_fixtures_df.to_csv("{}saved_fixtures.csv".format(data_dir),index=False)

## Fixture Stats

In [23]:
#In this section we will download given fixture stats.
#There is two limits: Api Calls Per minute(450/min) and Api Calls Quota for a day(75000)

In [24]:
saved_fixtures_df

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
0,185545,"Fredy Fautrel, France",UTC,2015-08-07T18:30:00+00:00,1438972200,1.438972e+09,1.438976e+09,,Stade Pierre Mauroy,Lille,...,0.0,1.0,0.0,0.0,0.0,1.0,,,,
1,185546,"Wilfried Bien, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Stade de la Beaujoire,Nantes,...,1.0,0.0,0.0,0.0,1.0,0.0,,,,
2,185547,"Hakim Ben el Salem Hadj, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,660.0,Stade de la Mosson,Montpellier,...,0.0,2.0,0.0,1.0,0.0,2.0,,,,
3,185548,"Lionel Jaffredo, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Allianz Rivera,Nice,...,1.0,2.0,1.0,0.0,1.0,2.0,,,,
4,185549,"Frank Schneider, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,667.0,Orange Velodrome,Marseille,...,0.0,1.0,0.0,1.0,0.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10871,1146819,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,5664.0,Estádio Dr. Jorge Ismael de Biasi,"Novo Horizonte, São Paulo",...,,,,,,,,,,
10872,1146820,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,258.0,Allianz Parque,"São Paulo, São Paulo",...,,,,,,,,,,
10873,1146821,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,7045.0,Estádio Bruno José Daniel,"Santo André, São Paulo",...,,,,,,,,,,
10874,1146822,,UTC,2024-03-10T21:00:00+00:00,1710104400,,,10494.0,Estádio Urbano Caldeira,"Santos, São Paulo",...,,,,,,,,,,


In [25]:
saved_fixtures_df.columns

Index(['fixture.id', 'fixture.referee', 'fixture.timezone', 'fixture.date',
       'fixture.timestamp', 'fixture.periods.first', 'fixture.periods.second',
       'fixture.venue.id', 'fixture.venue.name', 'fixture.venue.city',
       'fixture.status.long', 'fixture.status.short', 'fixture.status.elapsed',
       'league.id', 'league.name', 'league.country', 'league.logo',
       'league.season', 'league.round', 'teams.home.id', 'teams.home.name',
       'teams.home.winner', 'teams.away.id', 'teams.away.name',
       'teams.away.winner', 'goals.home', 'goals.away', 'score.halftime.home',
       'score.halftime.away', 'score.fulltime.home', 'score.fulltime.away',
       'score.extratime.home', 'score.extratime.away', 'score.penalty.home',
       'score.penalty.away'],
      dtype='object')

In [26]:
saved_fixtures_df["fixture.status.long"].unique()

array(['Match Finished', 'Match Cancelled', 'Match Postponed',
       'Technical loss', 'Not Started', 'Time to be defined'],
      dtype=object)

In [27]:
finished_matches=['Match Finished', 'Technical loss' ]
finished_matches

['Match Finished', 'Technical loss']

In [28]:
relevant_saved_fixtures=saved_fixtures_df[(saved_fixtures_df["fixture.status.long"].isin(finished_matches))
                                         & (saved_fixtures_df["league.season"]>=2022)]
relevant_saved_fixtures

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
2664,871470,F. Letexier,UTC,2022-08-06T15:00:00+00:00,1659798000,1.659798e+09,1.659802e+09,681.0,Stade de la Meinau,Strasbourg,...,1.0,2.0,0.0,1.0,1.0,2.0,,,,
2665,871471,B. Dechepy,UTC,2022-08-07T15:05:00+00:00,1659884700,1.659885e+09,1.659888e+09,680.0,Roazhon Park,Rennes,...,0.0,1.0,0.0,0.0,0.0,1.0,,,,
2666,871472,J. Stinat,UTC,2022-08-06T19:00:00+00:00,1659812400,1.659812e+09,1.659816e+09,644.0,Stade Gabriel Montpied,Clermont-Ferrand,...,0.0,5.0,0.0,3.0,0.0,5.0,,,,
2667,871473,J. Hamel,UTC,2022-08-07T11:00:00+00:00,1659870000,1.659870e+09,1.659874e+09,682.0,Stadium de Toulouse,Toulouse,...,1.0,1.0,1.0,0.0,1.0,1.0,,,,
2668,871474,B. Bastien,UTC,2022-08-05T19:00:00+00:00,1659726000,1.659726e+09,1.659730e+09,666.0,Groupama Stadium,Décines-Charpieu,...,2.0,1.0,2.0,1.0,2.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10831,1146779,Guilherme Nunes de Santana,UTC,2024-02-13T00:00:00+00:00,1707782400,1.707782e+09,1.707786e+09,5664.0,Estádio Dr. Jorge Ismael de Biasi,"Novo Horizonte, São Paulo",...,2.0,0.0,0.0,0.0,2.0,0.0,,,,
10832,1146780,Fabiano Monteiro dos Santos,UTC,2024-02-10T21:00:00+00:00,1707598800,1.707599e+09,1.707602e+09,261.0,Estádio Moisés Lucarelli,"Campinas, São Paulo",...,2.0,0.0,0.0,0.0,2.0,0.0,,,,
10833,1146781,Douglas Marques das Flores,UTC,2024-02-10T23:15:00+00:00,1707606900,1.707607e+09,1.707610e+09,220.0,Estádio Nabi Abi Chedid,"Bragança Paulista, São Paulo",...,1.0,1.0,0.0,0.0,1.0,1.0,,,,
10834,1146782,Lucas Canetto Bellote,UTC,2024-02-12T22:00:00+00:00,1707775200,1.707775e+09,1.707779e+09,7045.0,Estádio Bruno José Daniel,"Santo André, São Paulo",...,1.0,1.0,0.0,0.0,1.0,1.0,,,,


In [29]:
call_rate=60/300

In [30]:
quota=api.quota_info()-10

used : 0, quota : 7500, remains : 7500


In [31]:

try:
    
    fixture_stat=pd.read_csv("{}fixture_stat.csv".format(data_dir))
    
except:
    
    fixture_stat=[]


In [32]:
full_stats=fixture_stat
full_stats

Unnamed: 0,team.id,team.name,team.logo,statistics.Shots on Goal,statistics.Shots off Goal,statistics.Total Shots,statistics.Blocked Shots,statistics.Shots insidebox,statistics.Shots outsidebox,statistics.Fouls,...,statistics.expected_goals,statistics.Assists,statistics.Counter Attacks,statistics.Cross Attacks,statistics.Free Kicks,statistics.Goals,statistics.Goal Attempts,statistics.Substitutions,statistics.Throwins,statistics.Medical Treatment
0,79,Lille,https://media-2.api-sports.io/football/teams/7...,2.0,4.0,12.0,6.0,4.0,8.0,,...,,,,,,,,,,
1,85,Paris Saint Germain,https://media-1.api-sports.io/football/teams/8...,3.0,2.0,7.0,2.0,6.0,1.0,,...,,,,,,,,,,
2,83,Nantes,https://media-1.api-sports.io/football/teams/8...,4.0,2.0,9.0,3.0,4.0,6.0,,...,,,,,,,,,,
3,90,Guingamp,https://media-2.api-sports.io/football/teams/9...,4.0,4.0,10.0,2.0,5.0,5.0,,...,,,,,,,,,,
4,82,Montpellier,https://media-2.api-sports.io/football/teams/8...,4.0,7.0,11.0,,9.0,2.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144684,1011,FC ST. Gallen,https://media.api-sports.io/football/teams/101...,1.0,10.0,15.0,4.0,9.0,6.0,9.0,...,,,,,,,,,,
144685,6653,Yverdon Sport,https://media.api-sports.io/football/teams/665...,4.0,3.0,8.0,1.0,6.0,2.0,16.0,...,,,,,,,,,,
144686,2184,Servette FC,https://media.api-sports.io/football/teams/218...,3.0,4.0,9.0,2.0,7.0,2.0,12.0,...,,,,,,,,,,
144687,3476,Jamshedpur,https://media.api-sports.io/football/teams/347...,5.0,7.0,15.0,3.0,10.0,5.0,16.0,...,,,,,,,,,,


In [33]:
downloaded_stat_fixs=list(full_stats["fixture.id"].unique()) 

In [34]:
#According to current stat data, downloaded and not downloaded fixtures selected.
downloaded_stat_fixs=list(full_stats["fixture.id"].unique()) 
not_downloaded_stat_fixs=list(relevant_saved_fixtures["fixture.id"][relevant_saved_fixtures["fixture.id"].isin(downloaded_stat_fixs)==False].sort_values().unique())
len(not_downloaded_stat_fixs)

80

In [35]:
quota=api.quota_info()-10

fixture_json=[]

iterasyon=0

#Not downloaded stat fixtures iterated to be downloaded
for i in not_downloaded_stat_fixs:
    
    if iterasyon<quota:
        
        
        current=time.time()
        fixture_json=fixture_json+api.fix_stat(i)
        diff=time.time()-current
        
        if diff > call_rate:
            time.sleep(0.2)

        
    print(iterasyon)   
    iterasyon+=1
post_fixture_stat_df= pd.json_normalize(fixture_json)

used : 0, quota : 7500, remains : 7500
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79


In [36]:
#Previous and downloaded fixture stats joined
fixture_stat_df=pd.concat([full_stats,post_fixture_stat_df])

fixture_stat_df

Unnamed: 0,team.id,team.name,team.logo,statistics.Shots on Goal,statistics.Shots off Goal,statistics.Total Shots,statistics.Blocked Shots,statistics.Shots insidebox,statistics.Shots outsidebox,statistics.Fouls,...,statistics.expected_goals,statistics.Assists,statistics.Counter Attacks,statistics.Cross Attacks,statistics.Free Kicks,statistics.Goals,statistics.Goal Attempts,statistics.Substitutions,statistics.Throwins,statistics.Medical Treatment
0,79,Lille,https://media-2.api-sports.io/football/teams/7...,2.0,4.0,12.0,6.0,4.0,8.0,,...,,,,,,,,,,
1,85,Paris Saint Germain,https://media-1.api-sports.io/football/teams/8...,3.0,2.0,7.0,2.0,6.0,1.0,,...,,,,,,,,,,
2,83,Nantes,https://media-1.api-sports.io/football/teams/8...,4.0,2.0,9.0,3.0,4.0,6.0,,...,,,,,,,,,,
3,90,Guingamp,https://media-2.api-sports.io/football/teams/9...,4.0,4.0,10.0,2.0,5.0,5.0,,...,,,,,,,,,,
4,82,Montpellier,https://media-2.api-sports.io/football/teams/8...,4.0,7.0,11.0,,9.0,2.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,138,Guarani Campinas,https://media.api-sports.io/football/teams/138...,3.0,3.0,8.0,2.0,2.0,6.0,10.0,...,,,,,,,,,,
26,10003,Santo André,https://media.api-sports.io/football/teams/100...,5.0,3.0,9.0,1.0,7.0,2.0,18.0,...,,,,,,,,,,
27,121,Palmeiras,https://media.api-sports.io/football/teams/121...,5.0,8.0,18.0,5.0,12.0,6.0,13.0,...,,,,,,,,,,
28,3477,Kerala Blasters,https://media.api-sports.io/football/teams/347...,3.0,6.0,11.0,2.0,7.0,4.0,10.0,...,,,,,,,,,,


In [37]:
#Saving new fixture statistics dataframe
fixture_stat_df.to_csv("{}fixture_stat.csv".format(data_dir),index=False)

In [38]:
fixture_stat_df.columns

Index(['team.id', 'team.name', 'team.logo', 'statistics.Shots on Goal',
       'statistics.Shots off Goal', 'statistics.Total Shots',
       'statistics.Blocked Shots', 'statistics.Shots insidebox',
       'statistics.Shots outsidebox', 'statistics.Fouls',
       'statistics.Corner Kicks', 'statistics.Offsides',
       'statistics.Ball Possession', 'statistics.Yellow Cards',
       'statistics.Red Cards', 'statistics.Goalkeeper Saves',
       'statistics.Total passes', 'statistics.Passes accurate',
       'statistics.Passes %', 'fixture.id', 'statistics.expected_goals',
       'statistics.Assists', 'statistics.Counter Attacks',
       'statistics.Cross Attacks', 'statistics.Free Kicks', 'statistics.Goals',
       'statistics.Goal Attempts', 'statistics.Substitutions',
       'statistics.Throwins', 'statistics.Medical Treatment'],
      dtype='object')

In [39]:
downloaded=list(fixture_stat_df["fixture.id"].unique())
len(downloaded)

72365

In [40]:

check=saved_fixtures_df[(saved_fixtures_df["fixture.id"].isin(downloaded)) & 
                       (saved_fixtures_df["fixture.status.long"]=='Match Finished')]
check

Unnamed: 0,fixture.id,fixture.referee,fixture.timezone,fixture.date,fixture.timestamp,fixture.periods.first,fixture.periods.second,fixture.venue.id,fixture.venue.name,fixture.venue.city,...,goals.home,goals.away,score.halftime.home,score.halftime.away,score.fulltime.home,score.fulltime.away,score.extratime.home,score.extratime.away,score.penalty.home,score.penalty.away
0,185545,"Fredy Fautrel, France",UTC,2015-08-07T18:30:00+00:00,1438972200,1.438972e+09,1.438976e+09,,Stade Pierre Mauroy,Lille,...,0.0,1.0,0.0,0.0,0.0,1.0,,,,
1,185546,"Wilfried Bien, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Stade de la Beaujoire,Nantes,...,1.0,0.0,0.0,0.0,1.0,0.0,,,,
2,185547,"Hakim Ben el Salem Hadj, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,660.0,Stade de la Mosson,Montpellier,...,0.0,2.0,0.0,1.0,0.0,2.0,,,,
3,185548,"Lionel Jaffredo, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,,Allianz Rivera,Nice,...,1.0,2.0,1.0,0.0,1.0,2.0,,,,
4,185549,"Frank Schneider, France",UTC,2015-08-08T19:00:00+00:00,1439060400,1.439060e+09,1.439064e+09,667.0,Orange Velodrome,Marseille,...,0.0,1.0,0.0,1.0,0.0,1.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10831,1146779,Guilherme Nunes de Santana,UTC,2024-02-13T00:00:00+00:00,1707782400,1.707782e+09,1.707786e+09,5664.0,Estádio Dr. Jorge Ismael de Biasi,"Novo Horizonte, São Paulo",...,2.0,0.0,0.0,0.0,2.0,0.0,,,,
10832,1146780,Fabiano Monteiro dos Santos,UTC,2024-02-10T21:00:00+00:00,1707598800,1.707599e+09,1.707602e+09,261.0,Estádio Moisés Lucarelli,"Campinas, São Paulo",...,2.0,0.0,0.0,0.0,2.0,0.0,,,,
10833,1146781,Douglas Marques das Flores,UTC,2024-02-10T23:15:00+00:00,1707606900,1.707607e+09,1.707610e+09,220.0,Estádio Nabi Abi Chedid,"Bragança Paulista, São Paulo",...,1.0,1.0,0.0,0.0,1.0,1.0,,,,
10834,1146782,Lucas Canetto Bellote,UTC,2024-02-12T22:00:00+00:00,1707775200,1.707775e+09,1.707779e+09,7045.0,Estádio Bruno José Daniel,"Santo André, São Paulo",...,1.0,1.0,0.0,0.0,1.0,1.0,,,,


### FIXTURE EVENT

In [47]:
#Checking for if there is previously saved fixture events.
try:
    
    fixture_event=pd.read_csv("{}fixture_event_df.csv".format(data_dir))
    
except:
    
    fixture_event=[]
   

In [48]:
prior_fixture_event=fixture_event

In [49]:
#Selecting fixtures which their events data has been downloaded and not downloaded yet.
downloaded_stat_events=list(prior_fixture_event["fixture.id"].unique()) 
not_downloaded_stat_events=list(relevant_saved_fixtures["fixture.id"][relevant_saved_fixtures["fixture.id"].isin(downloaded_stat_events)==False].sort_values().unique())
len(not_downloaded_stat_events)

51

In [50]:

quota=api.quota_info()-10

iterasyon=0
post_event_json=[]

#Iterating over not_downloaded_stat_events
for i in not_downloaded_stat_events:
    
    if iterasyon<quota:
        
        
        current=time.time()
        post_event_json=post_event_json+api.fix_events(i)
        diff=time.time()-current
        
        if diff < call_rate:
            time.sleep(0.2)
        
        

    print(iterasyon,i)   
    iterasyon+=1
post_fixture_event_df= pd.json_normalize(post_event_json).drop("team.logo",axis=1)

used : 0, quota : 7500, remains : 7500
0 828579
1 884549
2 884555
3 884560
4 884563
5 884568
6 884573
7 884577
8 884580
9 884586
10 884591
11 884593
12 884599
13 884609
14 884610
15 884614
16 884623
17 884627
18 884629
19 884634
20 884641
21 884645
22 884653
23 884654
24 884656
25 884659
26 884669
27 884672
28 884675
29 884677
30 927042
31 927044
32 927045
33 927047
34 927053
35 927059
36 1035405
37 1035984
38 1038182
39 1044749
40 1044750
41 1045468
42 1046113
43 1052487
44 1063656
45 1074840
46 1074846
47 1074847
48 1146779
49 1146782
50 1168373


In [51]:
#Joining previous and downloaded fixture events
fixture_event_df=pd.concat([prior_fixture_event,post_fixture_event_df])

In [52]:
#Saving new fixture event dataframe
fixture_event_df.to_csv("{}fixture_event_df.csv".format(data_dir),index=False)