### Data getter from kaggle

Get race events as safety cars and red flags from. We can not get the CSV files from the script because login with Kaggle is needed, so we should download and put the datasets in the input folder (deafults to `./temp`) manually. The source is: https://www.kaggle.com/datasets/jtrotman/formula-1-race-events

In [18]:
import pandas as pd
import os


In [19]:
RED_FLAGS_FILE = "temp/red_flags.csv"
SAFETY_CARS_FILE = "temp/safety_cars.csv"

# Path of the current data
INPUT_PATH = "./../data/csv/"

# Path to generate the new db files
OUTPUT_PATH = "generated/Data to replace to the DB"

if not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)


In [20]:
current_races = pd.read_csv(INPUT_PATH + "events.csv").merge(
    pd.read_csv(INPUT_PATH + "grandsPrix.csv"), right_on="id", left_on="grandPrixId"
)

current_races["year"] = current_races["id_x"].apply(lambda x: str(x).split("-")[0])

current_races.head()


Unnamed: 0,id_x,raceDate,grandPrixId,name_x,qualyFormat,circuitId,scheduledLaps,posterURL,id_y,name_y,fullName,shortName,countryId,year
0,1950-1,1950-05-13,great-britain,1950 RAC British Grand Prix,TWO_SESSION,silverstone,70,https://www.progcovers.com/motor/silverstone50...,great-britain,Great Britain,British Grand Prix,British GP,GB,1950
1,1951-5,1951-07-14,great-britain,1951 RAC British Grand Prix,TWO_SESSION,silverstone,90,https://www.progcovers.com/motor/silver51.jpg,great-britain,Great Britain,British Grand Prix,British GP,GB,1951
2,1952-5,1952-07-19,great-britain,1952 RAC British Grand Prix,TWO_SESSION,silverstone,85,https://www.progcovers.com/motor/silverstone52...,great-britain,Great Britain,British Grand Prix,British GP,GB,1952
3,1953-6,1953-07-18,great-britain,1953 RAC British Grand Prix,TWO_SESSION,silverstone,90,https://www.progcovers.com/motor/silverstone53...,great-britain,Great Britain,British Grand Prix,British GP,GB,1953
4,1954-5,1954-07-17,great-britain,1954 RAC British Grand Prix,TWO_SESSION,silverstone,90,https://www.progcovers.com/motor/silverstone54...,great-britain,Great Britain,British Grand Prix,British GP,GB,1954


In [21]:
""" ---------------------------------------------
----------------- RED FLAGS ---------------------
--------------------------------------------- """

red_flags = pd.read_csv(RED_FLAGS_FILE)

red_flags["year"] = red_flags["Race"].apply(lambda x: str(x).split(" ")[0])
red_flags["grand_prix"] = red_flags["Race"].apply(
    lambda x: " ".join(str(x).split(" ")[1:])
)

red_flags.head()

Unnamed: 0,Race,Lap,Resumed,Incident,Excluded,year,grand_prix
0,1950 Indianapolis 500,138,N,Rain.,,1950,Indianapolis 500
1,1971 Canadian Grand Prix,64,N,Mist.,,1971,Canadian Grand Prix
2,1973 British Grand Prix,2,Y,"Crash involving Jody Scheckter, Jean-Pierre Be...","Jody Scheckter, Jean-Pierre Beltoise, George F...",1973,British Grand Prix
3,1974 Brazilian Grand Prix,32,N,Rain.,,1974,Brazilian Grand Prix
4,1975 Spanish Grand Prix,29,N,Crash of Rolf Stommelen which killed five spec...,,1975,Spanish Grand Prix


In [22]:
red_flags = red_flags.merge(
    current_races, left_on=["grand_prix", "year"], right_on=["fullName", "year"]
)

red_flags = red_flags[["Lap", "Resumed", "Incident", "Excluded", "id_x"]]

# Renaming columns:
red_flags.columns = [x.lower() for x in red_flags.columns]
red_flags.rename(columns={"id_x": "eventId"}, inplace=True)

# Save the data:
red_flags.to_csv(f"{OUTPUT_PATH}/redFlags.csv", index=False)


In [23]:
""" ---------------------------------------------
---------------- SAFETY CAR ---------------------
--------------------------------------------- """


safety_cars = pd.read_csv(SAFETY_CARS_FILE)

safety_cars["year"] = safety_cars["Race"].apply(lambda x: str(x).split(" ")[0])
safety_cars["grand_prix"] = safety_cars["Race"].apply(
    lambda x: " ".join(str(x).split(" ")[1:])
)

safety_cars.head()

Unnamed: 0,Race,Cause,Deployed,Retreated,FullLaps,year,grand_prix
0,1973 Canadian Grand Prix,Accident,33,39.0,5,1973,Canadian Grand Prix
1,1993 Brazilian Grand Prix,Accident/Rain,29,38.0,8,1993,Brazilian Grand Prix
2,1993 British Grand Prix,Stranded car,38,40.0,1,1993,British Grand Prix
3,1994 San Marino Grand Prix,Accident,1,6.0,4,1994,San Marino Grand Prix
4,1995 Belgian Grand Prix,Rain,28,33.0,4,1995,Belgian Grand Prix


In [24]:
safety_cars = safety_cars.merge(
    current_races, left_on=["grand_prix", "year"], right_on=["fullName", "year"]
)

safety_cars = safety_cars[["Cause", "Deployed", "Retreated", "FullLaps", "id_x"]]

safety_cars["Retreated"] = safety_cars["Retreated"].astype("Int64")

# Renaming columns:
safety_cars.columns = [x.lower() for x in safety_cars.columns]
safety_cars.rename(columns={"id_x": "eventId"}, inplace=True)

# Save the data:
safety_cars.to_csv(f"{OUTPUT_PATH}/safetyCars.csv", index=False)