In [1]:
import pandas as pd
import json
import requests

# Tablas

### Races

In [2]:
races={'year':[],'round':[],'raceName':[],'circuitId':[],'lat':[],'long':[],'country':[],'date':[],'url':[]}
#Creamos un diccionario para almacenar la información extraida de la api y poder meterla a cada una de las listas (columnas)
for year in list(range(2010,2023)): #hacemos ciclo porque la url/link cambia en el año
    url='http://ergast.com/api/f1/{}.json?limit=1000'
    response = requests.get(url.format(year)) #llamamos a la pagina 
    content=json.loads(response.content) #respuesta de la pagina
    for item in content['MRData']['RaceTable']['Races']:# extraemos info
        races['year'].append(int(item['season']))
        races['round'].append(int(item['round']))
        races['raceName'].append(item['raceName'])
        races['circuitId'].append(item['Circuit']['circuitId'])
        races['lat'].append(float(item['Circuit']['Location']['lat']))
        races['long'].append(float(item['Circuit']['Location']['long']))
        races['country'].append(item['Circuit']['Location']['country'])
        races['date'].append(item['date'])
        races['url'].append(item['url'])
df_races=pd.DataFrame(races) #hacemos Dataframe
df_races['date'] = df_races['date'].astype('datetime64[ns]')# cambiamos de tipo la columna de la fecha
df_races.to_csv('Races.csv',index=False)

### Seasons

In [3]:
url='http://ergast.com/api/f1/seasons.json?limit=1000'# utilizamos el ?limit=1000 para que no tengamos restricciones, sino solo nos daria los primeros 30
response = requests.get(url)
content=json.loads(response.content)
season ={'year':[],'url':[]}
#hacemos diccionario para despues almacenar lo extraido de la api
for item in content['MRData']['SeasonTable']['Seasons']:#extraemos la info
    season['year'].append(int(item['season']))
    season['url'].append(item['url'])
df_Season=pd.DataFrame(season) #Formamos Data Frame
df_Season.to_csv('Seasons.csv',index=False) #Exportamos el archivo en formato csv

### Drivers

In [4]:
drivers = {'year':[],"DriverId":[],"Number":[],"code":[],"forename":[],"surname":[],"dob":[],"nationality":[],"url":[]}
#hacemos diccionario para despues almacenar lo extraido de la api
for year in list(range(2010,2023)):# hacemos ciclo porque en la url/link cambia años
    url='http://ergast.com/api/f1/{}/drivers.json?limit=1000'
    response = requests.get(url.format(year))
    content=json.loads(response.content)
    for i in range(0,len(content['MRData']['DriverTable']["Drivers"])):#traemos la info
            drivers["year"].append(int(content['MRData']['DriverTable']['season']))
            try:
                drivers["Number"].append(int(content['MRData']['DriverTable']['Drivers'][i]["permanentNumber"]))
            except:
                drivers["Number"].append("")
            drivers["DriverId"].append(content['MRData']['DriverTable']['Drivers'][i]["driverId"])
            try:
                drivers["code"].append(content['MRData']['DriverTable']['Drivers'][i]["code"])
            except:
                drivers["code"].append("")
            drivers["forename"].append(content['MRData']['DriverTable']['Drivers'][i]["givenName"])
            drivers["surname"].append(content['MRData']['DriverTable']['Drivers'][i]["familyName"])                       
            drivers["dob"].append(content['MRData']['DriverTable']['Drivers'][i]["dateOfBirth"])                        
            drivers["nationality"].append(content['MRData']['DriverTable']['Drivers'][i]["nationality"])
            drivers["url"].append(content['MRData']['DriverTable']['Drivers'][i]["url"]) 
df_drivers=pd.DataFrame(drivers) #Formamos un DataFrame
df_drivers['dob'] = df_drivers['dob'].astype('datetime64[ns]') 
df_drivers.to_csv('Drivers.csv',index=False) #Exportamos el archivo en formato csv

### Constructors

In [5]:
constructors = {"year":[],"constuctorId":[],"name":[],"nationality":[],"url":[],}
#hacemos diccionario para despues almacenar lo extraido de la api
for year in list(range(2010,2023)):#hacemos un for para extraer la info del año/temporada
    url='http://ergast.com/api/f1/{}/constructors.json?limit=1000'
    response = requests.get(url.format(year))
    content=json.loads(response.content)
    #hacemos ciclo para extraer info
    for item in range(0,len(content['MRData']['ConstructorTable']['Constructors'])):
        constructors["year"].append(int(content['MRData']['ConstructorTable']['season']))
        constructors["constuctorId"].append(content['MRData']['ConstructorTable']['Constructors'][item]['constructorId'])
        constructors['name'].append(content['MRData']['ConstructorTable']['Constructors'][item]['name'])
        constructors['nationality'].append(content['MRData']['ConstructorTable']['Constructors'][item]['nationality'])
        constructors['url'].append(content['MRData']['ConstructorTable']['Constructors'][item]['url'])
df_constructors=pd.DataFrame(constructors) #Formamos un DataFrame
df_constructors.to_csv('Constructors.csv',index=False) #Exportamos el archivo en formato csv

### Circuits

In [6]:
circuits={'year':[],'circuitId':[],
           'name':[],'location':[], 'country':[],
           'lat':[], 'lng':[], 'alt':[],
           'url':[]}
#Creamos un diccionario para después almacenar la información extraída de la api
for year in list(range(2010,2023)):
    url= 'http://ergast.com/api/f1/{}/circuits.json?limit=1000'
    response = requests.get(url.format(year))
    content=json.loads(response.content)
    path=content['MRData']['CircuitTable']
    for item in range(0,len(path['Circuits'])):
        circuits['year'].append(int(path['season']))
        circuits['circuitId'].append(path['Circuits'][item]['circuitId'])
        circuits['name'].append(path['Circuits'][item]['circuitName'])
        circuits['location'].append(path['Circuits'][item]['Location']['locality'])
        circuits['country'].append(path['Circuits'][item]['Location']['country'])
        circuits['lat'].append(float(path['Circuits'][item]['Location']['lat']))
        circuits['lng'].append(float(path['Circuits'][item]['Location']['long']))
        circuits['alt'].append(path['Circuits'][item]['Location']['locality'])
        circuits['url'].append(path['Circuits'][item]['url'])
df_circuits=pd.DataFrame(circuits) #Formamos un DataFrame
df_circuits.to_csv('Circuits.csv',index=False) #Exportamos el archivo en formato csv

### Qualys

In [7]:
quali={"year":[],"driverId":[],"ConstructorId":[],"number":[],"position":[],"qualifyId":[],"raceId":[],"q1":[],"q2":[],"q3":[]}
#Creamos un diccionario para almacenar la información extraída de la api
for year in range(2010,2023):#separamos el url ya que necesitamos año/temporada y # de carrera
    url='http://ergast.com/api/f1' 
    url1=url+'/{}.json?limit=1000'
    #jugamos concatenando la url
    url2=url+'/{}'.format(year)
    response = requests.get(url1.format(year)) 
    content=json.loads(response.content)
    link=[]#creamos una lista que va a contener los links y después hacemos el proceso de siempre
    for k in range(1,len(content['MRData']['RaceTable']['Races'])+1):
        url3=url2+'/{}/qualifying.json?limit=100'
        link.append(url3.format(k))
        
    for lk in link: #Extraemos la información
        response = requests.get(lk)
        content=json.loads(response.content)
        for it in content['MRData']['RaceTable']['Races']:#doble for porque hay año y 20 carreras aprox en cada temporada
            for it1 in content['MRData']['RaceTable']['Races'][0]['QualifyingResults']:
                quali["year"].append(int(it['season']))
                quali["qualifyId"].append(int( it['round']))
                quali["raceId"].append(it['raceName']) 
                quali["driverId"].append(it1['Driver']['driverId'])   
                quali["ConstructorId"].append(it1['Constructor']['constructorId'])
                quali["number"].append(int(it1['number']))
                quali["position"].append(int(it1['position']))
                #utilizamos try porque hay entradas NAS ya que se van eliminando 
                quali["q1"].append(it1['Q1'])# se eliminan los ultimos 10
                try:
                    quali["q2"].append(it1['Q2'])# se eliminan los ultimos 5 restantes
                except:
                    quali["q2"].append("")
                try:
                    quali["q3"].append(it1['Q3'])# solo tienen tiempo los 5 restantes
                except:
                    quali["q3"].append("")

dfquali=pd.DataFrame.from_dict(quali,orient='index') #Formamos un DataFrame
dfquali=dfquali.transpose()
dfquali.to_csv('Qualifiying.csv',index=False) #Exportamos el archivo en formato csv