<a href="https://colab.research.google.com/github/hinafarooq21/F1-Lap-Predictor--Capstone/blob/main/Cleaning_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cleaning the Dataset

## Importing Libraries

In [None]:
import pandas as pd # General Data usage
import numpy as np # Maths
import seaborn as sns # Visualisations
import matplotlib.pyplot as plt  # Visualisations
import re
from datetime import timedelta

In [None]:
# Visualising the full dataset
pd.set_option('display.max_columns', None)

In [None]:
# Importing the Dataframe
# Encoding as the data has some discrepencies
df = pd.read_csv('Formula Circuit Database.csv', encoding='latin-1')
circuits = df.copy()

## Checking the Data
**Essential checks were performed to ensure data quality:**

  Data Types: Converting all columns to correct datatypes (e.g. converting lap times to date/time)

  Null Values: The dataset contains some null values that requiring filling in.

In [None]:
# Getting a sense of the variables available in the data set
circuits.columns

Index(['Circuit', 'Type', 'Direction', 'Location', 'Country', 'Length',
       'Turns', 'Grands Prix', 'Season(s)', 'Grands Prix held', 'Circuit Name',
       'Lap Tme', 'Driver', 'Car', 'Year', 'Race Laps', 'Race Dist',
       'DistTurn1', 'Longst Straight', 'Elevation', 'Width'],
      dtype='object')

In [None]:
# Dropping circuit name as it is the same as circuit
circuits.drop('Circuit Name', axis=1, inplace=True)

In [None]:
# Initial dataframe review for comprehensive understanding.
circuits.head(60)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780Â km (2.349Â mi),16,Australian Grand Prix,"1985, 1995",11.0,01:15.4,Damon Hill,Williams FW15C,1993.0,82.0,309.96,300.0,800.0,3.0,12.0
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618Â km (4.734Â mi),18,Moroccan Grand Prix,1958,1.0,02:22.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403.754,300.0,1000.0,,10.0
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828Â km (3.000Â mi),12,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,01:51.8,Jim Clark,Lotus 25,1963.0,75.0,362.1,400.0,1600.0,,12.0
3,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278Â km (3.280Â mi),16,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,01:19.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307.574,350.0,300.0,2.6,14.0
4,Algarve International Circuit,Race circuit,Clockwise,Portimao,Portugal,4.653Â km (2.891Â mi),15,Portuguese Grand Prix,"2020, 2021",2.0,01:18.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306.826,400.0,900.0,24.0,12.0
5,AutÃÂ³dromo do Estoril,Race circuit,Clockwise,Estoril,Portugal,4.360Â km (2.709Â mi),13,Portuguese Grand Prix,"1984, 1996",13.0,01:22.4,David Coulthard,Williams FW16B,1994.0,71.0,309.56,300.0,1020.0,,10.0
6,AutÃÂ³dromo Hermanos RodrÃÂ­guez,Race circuit,Clockwise,Mexico City,Mexico,4.304Â km (2.674Â mi),17,"Mexican Grand Prix, Mexico City Grand Prix","1963, 1970, 1986, 1992, 2015, 2019, 2021, 2024",24.0,01:17.8,Valtteri Bottas,Mercedes W12,2021.0,71.0,305.354,890.0,1200.0,2.8,9.0
7,AutÃÂ³dromo Internacional do Rio de Janeiro,Race circuit,Anti-clockwise,Rio de Janeiro,Brazil,5.031Â km (3.126Â mi),11,Brazilian Grand Prix,"1978, 1981, 1989",10.0,01:32.5,Riccardo Patrese,Williams FW12C,1989.0,61.0,306.891,,,,
8,Autodromo Internazionale del Mugello,Race circuit,Clockwise,Scarperia e San Piero,Italy,5.245Â km (3.259Â mi),14,Tuscan Grand Prix,2020,1.0,01:18.8,Lewis Hamilton,Mercedes W11,2020.0,59.0,309.457,700.0,1141.0,41.0,9.6
9,Autodromo Internazionale Enzo e Dino Ferrari,Race circuit,Anti-clockwise,Imola,Italy,4.909Â km (3.050Â mi),19,"Italian Grand Prix, San Marino Grand Prix, Emi...","1980, 2006, 2020, 2022, 2024",31.0,01:15.5,Lewis Hamilton,Mercedes W11,2020.0,63.0,309.049,399.0,550.0,33.32,12.0


In [None]:
# Replacing non-ASCII characters
circuits['Circuit'] = circuits['Circuit'].str.replace('ÃÂ³', 'o')
circuits['Circuit'] = circuits['Circuit'].str.replace('ÃÂ¯', 'i')
circuits['Circuit'] = circuits['Circuit'].str.replace('ÃÂ©', 'e')
circuits['Circuit'] = circuits['Circuit'].str.replace('ÃÂ¡', 'a')
circuits['Circuit'] = circuits['Circuit'].str.replace('ÃÂ¼', 'u')
circuits['Location'] = circuits['Location'].str.replace('ÃÂ¼', 'u')

In [None]:
circuits

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width
0,Adelaide Street Circuit,Street circuit,Clockwise,Adelaide,Australia,3.780Â km (2.349Â mi),16,Australian Grand Prix,"1985, 1995",11.0,01:15.4,Damon Hill,Williams FW15C,1993.0,82.0,309.960,300.0,800.0,3.0,12.0
1,Ain-Diab Circuit,Road circuit,Clockwise,Casablanca,Morocco,7.618Â km (4.734Â mi),18,Moroccan Grand Prix,1958,1.0,02:22.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403.754,300.0,1000.0,,10.0
2,Aintree Motor Racing Circuit,Road circuit,Clockwise,Aintree,United Kingdom,4.828Â km (3.000Â mi),12,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,01:51.8,Jim Clark,Lotus 25,1963.0,75.0,362.100,400.0,1600.0,,12.0
3,Albert Park Circuit,Street circuit,Clockwise,Melbourne,Australia,5.278Â km (3.280Â mi),16,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,01:19.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307.574,350.0,300.0,2.6,14.0
4,Algarve International Circuit,Race circuit,Clockwise,Portimao,Portugal,4.653Â km (2.891Â mi),15,Portuguese Grand Prix,"2020, 2021",2.0,01:18.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306.826,400.0,900.0,24.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,Valencia Street Circuit,Street circuit,Clockwise,Valencia,Spain,5.419Â km (3.367Â mi),25,European Grand Prix,"2008, 2012",5.0,01:38.7,Timo Glock,Toyota TF109,2009.0,57.0,308.883,400.0,876.0,6.0,12.0
74,Watkins Glen International,Race circuit,Clockwise,Watkins Glen,United States,5.430Â km (3.374Â mi),10,United States Grand Prix,"1961, 1980",20.0,01:34.1,Alan Jones,Williams FW07B,1980.0,59.0,320.700,400.0,1500.0,100.0,12.0
75,Yas Marina Circuit,Race circuit,Anti-clockwise,Abu Dhabi,United Arab Emirates,5.281Â km (3.281Â mi),16,Abu Dhabi Grand Prix,"2009, 2024",16.0,01:25.6,Kevin Magnussen,Haas VF-24,2024.0,55.0,305.555,305.0,1233.0,10.7,12.0
76,Zeltweg Airfield,Road circuit,Clockwise,Zeltweg,Austria,3.186Â km (1.980Â mi),4,Austrian Grand Prix,1964,1.0,01:10.6,Dan Gurney,Brabham BT7,1964.0,105.0,333.500,500.0,1200.0,0.0,8.0


Circuit name, Country, Location and Grand Prix names aren't necessary for the ML model therefore there is no need to convert them into numerical format.

In [None]:
# Examining variable data types to identify required transformations.
circuits.dtypes

Unnamed: 0,0
Circuit,object
Type,object
Direction,object
Location,object
Country,object
Length,object
Turns,object
Grands Prix,object
Season(s),object
Grands Prix held,float64


In [None]:
# Checking for null values in the dataset
circuits.isnull().sum()

Unnamed: 0,0
Circuit,1
Type,1
Direction,1
Location,1
Country,1
Length,1
Turns,1
Grands Prix,1
Season(s),1
Grands Prix held,1


In [None]:
# The first 17 columns having a null value is unexpected
# Checking the last 5 values to see if another row was added
circuits.tail()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width
73,Valencia Street Circuit,Street circuit,Clockwise,Valencia,Spain,5.419Â km (3.367Â mi),25.0,European Grand Prix,"2008, 2012",5.0,01:38.7,Timo Glock,Toyota TF109,2009.0,57.0,308.883,400.0,876.0,6.0,12.0
74,Watkins Glen International,Race circuit,Clockwise,Watkins Glen,United States,5.430Â km (3.374Â mi),10.0,United States Grand Prix,"1961, 1980",20.0,01:34.1,Alan Jones,Williams FW07B,1980.0,59.0,320.7,400.0,1500.0,100.0,12.0
75,Yas Marina Circuit,Race circuit,Anti-clockwise,Abu Dhabi,United Arab Emirates,5.281Â km (3.281Â mi),16.0,Abu Dhabi Grand Prix,"2009, 2024",16.0,01:25.6,Kevin Magnussen,Haas VF-24,2024.0,55.0,305.555,305.0,1233.0,10.7,12.0
76,Zeltweg Airfield,Road circuit,Clockwise,Zeltweg,Austria,3.186Â km (1.980Â mi),4.0,Austrian Grand Prix,1964,1.0,01:10.6,Dan Gurney,Brabham BT7,1964.0,105.0,333.5,500.0,1200.0,0.0,8.0
77,,,,,,,,,,,,,,,,,,,,


In [None]:
# There is a null row at index 77 which needs to be removed
circuits.drop(77, inplace=True)

In [None]:
# Making sure the row was dropped
circuits.tail()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width
72,TI Circuit Aida,Race circuit,Clockwise,Mimasaka,Japan,3.703Â km (2.301Â mi),11,Pacific Grand Prix,"1994, 1995",2.0,01:14.0,Michael Schumacher,Benetton B194,1994.0,67.0,305.404,300.0,1200.0,10.0,12.0
73,Valencia Street Circuit,Street circuit,Clockwise,Valencia,Spain,5.419Â km (3.367Â mi),25,European Grand Prix,"2008, 2012",5.0,01:38.7,Timo Glock,Toyota TF109,2009.0,57.0,308.883,400.0,876.0,6.0,12.0
74,Watkins Glen International,Race circuit,Clockwise,Watkins Glen,United States,5.430Â km (3.374Â mi),10,United States Grand Prix,"1961, 1980",20.0,01:34.1,Alan Jones,Williams FW07B,1980.0,59.0,320.7,400.0,1500.0,100.0,12.0
75,Yas Marina Circuit,Race circuit,Anti-clockwise,Abu Dhabi,United Arab Emirates,5.281Â km (3.281Â mi),16,Abu Dhabi Grand Prix,"2009, 2024",16.0,01:25.6,Kevin Magnussen,Haas VF-24,2024.0,55.0,305.555,305.0,1233.0,10.7,12.0
76,Zeltweg Airfield,Road circuit,Clockwise,Zeltweg,Austria,3.186Â km (1.980Â mi),4,Austrian Grand Prix,1964,1.0,01:10.6,Dan Gurney,Brabham BT7,1964.0,105.0,333.5,500.0,1200.0,0.0,8.0


In [None]:
circuits.Direction.value_counts()

Unnamed: 0_level_0,count
Direction,Unnamed: 1_level_1
Clockwise,56
Anti-clockwise,20
Part clockwise and part anti-clockwise (figure eight),1


In [None]:
# As there is only 1 value that is Part clockwise and part anti-clockwise, I will be making the assumption that it is Clockwise and assiging it the value 1
# Converting Direction to Boolean values
binary_columns = ['Direction']
binary_mappings = {'Anti-clockwise': 0,
                    'Clockwise': 1,
                    'Part clockwise and part anti-clockwise (figure eight)': 1,
                     0: 0,
                     1: 1}
circuits[binary_columns] = circuits[binary_columns].replace(binary_mappings)

  circuits[binary_columns] = circuits[binary_columns].replace(binary_mappings)


In [None]:
# Making sure the mappings are applied
circuits.head()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width
0,Adelaide Street Circuit,Street circuit,1,Adelaide,Australia,3.780Â km (2.349Â mi),16,Australian Grand Prix,"1985, 1995",11.0,01:15.4,Damon Hill,Williams FW15C,1993.0,82.0,309.96,300.0,800.0,3.0,12.0
1,Ain-Diab Circuit,Road circuit,1,Casablanca,Morocco,7.618Â km (4.734Â mi),18,Moroccan Grand Prix,1958,1.0,02:22.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403.754,300.0,1000.0,,10.0
2,Aintree Motor Racing Circuit,Road circuit,1,Aintree,United Kingdom,4.828Â km (3.000Â mi),12,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,01:51.8,Jim Clark,Lotus 25,1963.0,75.0,362.1,400.0,1600.0,,12.0
3,Albert Park Circuit,Street circuit,1,Melbourne,Australia,5.278Â km (3.280Â mi),16,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,01:19.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307.574,350.0,300.0,2.6,14.0
4,Algarve International Circuit,Race circuit,1,Portimao,Portugal,4.653Â km (2.891Â mi),15,Portuguese Grand Prix,"2020, 2021",2.0,01:18.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306.826,400.0,900.0,24.0,12.0


In [None]:
# Ensuring the values are 1 and 0
circuits.Direction.value_counts()

Unnamed: 0_level_0,count
Direction,Unnamed: 1_level_1
1,57
0,20


Converting the type of circuits into binary a binary column

In [None]:
# Checking value countd
circuits.Type.value_counts()

Unnamed: 0_level_0,count
Type,Unnamed: 1_level_1
Race circuit,48
Street circuit,19
Road circuit,10


In [None]:
# Combining Street circuit with Road circuit
# Creating a new column in case OHE gives better results
circuits['Type_bool'] = circuits['Type'].apply(lambda x: 1 if x in ['Race circuit'] else 0)
circuits.Type_bool.value_counts()

Unnamed: 0_level_0,count
Type_bool,Unnamed: 1_level_1
1,48
0,29


In [None]:
circuits.head()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width,Type_bool
0,Adelaide Street Circuit,Street circuit,1,Adelaide,Australia,3.780Â km (2.349Â mi),16,Australian Grand Prix,"1985, 1995",11.0,01:15.4,Damon Hill,Williams FW15C,1993.0,82.0,309.96,300.0,800.0,3.0,12.0,0
1,Ain-Diab Circuit,Road circuit,1,Casablanca,Morocco,7.618Â km (4.734Â mi),18,Moroccan Grand Prix,1958,1.0,02:22.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403.754,300.0,1000.0,,10.0,0
2,Aintree Motor Racing Circuit,Road circuit,1,Aintree,United Kingdom,4.828Â km (3.000Â mi),12,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,01:51.8,Jim Clark,Lotus 25,1963.0,75.0,362.1,400.0,1600.0,,12.0,0
3,Albert Park Circuit,Street circuit,1,Melbourne,Australia,5.278Â km (3.280Â mi),16,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,01:19.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307.574,350.0,300.0,2.6,14.0,0
4,Algarve International Circuit,Race circuit,1,Portimao,Portugal,4.653Â km (2.891Â mi),15,Portuguese Grand Prix,"2020, 2021",2.0,01:18.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306.826,400.0,900.0,24.0,12.0,1


In [None]:
# Extracting the Circuit Length and converting to m
# Remove `Â`
circuits['Length'] = circuits['Length'].str.replace('Â', '')
circuits['Length']

Unnamed: 0,Length
0,3.780 km (2.349 mi)
1,7.618 km (4.734 mi)
2,4.828 km (3.000 mi)
3,5.278 km (3.280 mi)
4,4.653 km (2.891 mi)
...,...
72,3.703 km (2.301 mi)
73,5.419 km (3.367 mi)
74,5.430 km (3.374 mi)
75,5.281 km (3.281 mi)


In [None]:
circuits['Length'] = circuits['Length'].str.extract(r'([\d.]+)\s*km').astype(float) * 1000
circuits['Length'].head()

Unnamed: 0,Length
0,3780.0
1,7618.0
2,4828.0
3,5278.0
4,4653.0


In [None]:
circuits.columns

Index(['Circuit', 'Type', 'Direction', 'Location', 'Country', 'Length',
       'Turns', 'Grands Prix', 'Season(s)', 'Grands Prix held', 'Lap Tme',
       'Driver', 'Car', 'Year', 'Race Laps', 'Race Dist', 'DistTurn1',
       'Longst Straight', 'Elevation', 'Width', 'Type_bool'],
      dtype='object')

In [None]:
circuits['Turns'] = pd.to_numeric(circuits['Turns'], errors='coerce')
circuits['Grands Prix held'] = pd.to_numeric(circuits['Grands Prix held'], errors='coerce')
circuits['Race Laps'] = pd.to_numeric(circuits['Race Laps'], errors='coerce')
circuits['Year'] = pd.to_numeric(circuits['Year'], errors='coerce')

In [None]:
# Changing Race Distance to ms
circuits['Race Dist'] = circuits['Race Dist']* 1000

In [None]:
circuits.head()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width,Type_bool
0,Adelaide Street Circuit,Street circuit,1,Adelaide,Australia,3780.0,16.0,Australian Grand Prix,"1985, 1995",11.0,01:15.4,Damon Hill,Williams FW15C,1993.0,82.0,309960.0,300.0,800.0,3.0,12.0,0
1,Ain-Diab Circuit,Road circuit,1,Casablanca,Morocco,7618.0,18.0,Moroccan Grand Prix,1958,1.0,02:22.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403754.0,300.0,1000.0,,10.0,0
2,Aintree Motor Racing Circuit,Road circuit,1,Aintree,United Kingdom,4828.0,12.0,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,01:51.8,Jim Clark,Lotus 25,1963.0,75.0,362100.0,400.0,1600.0,,12.0,0
3,Albert Park Circuit,Street circuit,1,Melbourne,Australia,5278.0,16.0,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,01:19.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307574.0,350.0,300.0,2.6,14.0,0
4,Algarve International Circuit,Race circuit,1,Portimao,Portugal,4653.0,15.0,Portuguese Grand Prix,"2020, 2021",2.0,01:18.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306826.0,400.0,900.0,24.0,12.0,1


In [None]:
# Convert time to seconds
def seconds(time):
    # Check if the time is NaT (missing)
    if pd.isnull(time):
        return pd.NaT  # Return NaT if missing
    else:
        # Split the time into minutes and seconds
        minutes, seconds = str(time).split(':')

        # Convert minutes to seconds and add seconds
        total_seconds = int(minutes) * 60 + float(seconds)

        return total_seconds

circuits['Lap Tme'] = circuits['Lap Tme'].apply(seconds)

In [None]:
circuits.dtypes

Unnamed: 0,0
Circuit,object
Type,object
Direction,int64
Location,object
Country,object
Length,float64
Turns,float64
Grands Prix,object
Season(s),object
Grands Prix held,float64


In [None]:
circuits.head()

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width,Type_bool
0,Adelaide Street Circuit,Street circuit,1,Adelaide,Australia,3780.0,16.0,Australian Grand Prix,"1985, 1995",11.0,75.4,Damon Hill,Williams FW15C,1993.0,82.0,309960.0,300.0,800.0,3.0,12.0,0
1,Ain-Diab Circuit,Road circuit,1,Casablanca,Morocco,7618.0,18.0,Moroccan Grand Prix,1958,1.0,142.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403754.0,300.0,1000.0,,10.0,0
2,Aintree Motor Racing Circuit,Road circuit,1,Aintree,United Kingdom,4828.0,12.0,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,111.8,Jim Clark,Lotus 25,1963.0,75.0,362100.0,400.0,1600.0,,12.0,0
3,Albert Park Circuit,Street circuit,1,Melbourne,Australia,5278.0,16.0,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,79.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307574.0,350.0,300.0,2.6,14.0,0
4,Algarve International Circuit,Race circuit,1,Portimao,Portugal,4653.0,15.0,Portuguese Grand Prix,"2020, 2021",2.0,78.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306826.0,400.0,900.0,24.0,12.0,1


#### Getting rid of the nulls

In [None]:
circuits.isnull().sum()

Unnamed: 0,0
Circuit,0
Type,0
Direction,0
Location,0
Country,0
Length,0
Turns,1
Grands Prix,0
Season(s),0
Grands Prix held,0


In [None]:
# Filling null values with the minimum width allowed for new tracks
circuits['Width'] = circuits['Width'].fillna(12)
# Filling null values with 0 as elevation is unknown
circuits['Elevation'] = circuits['Elevation'].fillna(0)
# Fill null values with lowest allowed Distance to turn 1
circuits['DistTurn1'] = circuits['DistTurn1'].fillna(250)
# Fill null values with mean
circuits['Longst Straight'] = circuits['Longst Straight'].fillna(circuits['Longst Straight'].mean())

In [None]:
# Check null value count
circuits.isnull().sum()

Unnamed: 0,0
Circuit,0
Type,0
Direction,0
Location,0
Country,0
Length,0
Turns,1
Grands Prix,0
Season(s),0
Grands Prix held,0


In [None]:
# There is an unexpected null value in the Turns column, Check with circuit it's for and edit if needed to
circuits[circuits.isnull().any(axis=1)]

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width,Type_bool
59,Pescara Circuit,Road circuit,1,Pescara,Italy,25800.0,,Pescara Grand Prix,1957,1.0,584.6,Stirling Moss,Vanwall VW 5,1957.0,18.0,460420.0,800.0,5500.0,0.0,12.0,0


In [None]:
# The circuit with missing value for turns is Pescara Circuit
# There is no known value of turns at the Pescara circuit
# Fill the value as the rounded mean value
circuits['Turns'] = circuits['Turns'].fillna(round(circuits['Turns'].mean()))
circuits.head(60)

Unnamed: 0,Circuit,Type,Direction,Location,Country,Length,Turns,Grands Prix,Season(s),Grands Prix held,Lap Tme,Driver,Car,Year,Race Laps,Race Dist,DistTurn1,Longst Straight,Elevation,Width,Type_bool
0,Adelaide Street Circuit,Street circuit,1,Adelaide,Australia,3780.0,16.0,Australian Grand Prix,"1985, 1995",11.0,75.4,Damon Hill,Williams FW15C,1993.0,82.0,309960.0,300.0,800.0,3.0,12.0,0
1,Ain-Diab Circuit,Road circuit,1,Casablanca,Morocco,7618.0,18.0,Moroccan Grand Prix,1958,1.0,142.5,Stirling Moss,Vanwall VW 5,1958.0,53.0,403754.0,300.0,1000.0,0.0,10.0,0
2,Aintree Motor Racing Circuit,Road circuit,1,Aintree,United Kingdom,4828.0,12.0,British Grand Prix,"1955, 1957, 1959, 1961, 1962",5.0,111.8,Jim Clark,Lotus 25,1963.0,75.0,362100.0,400.0,1600.0,0.0,12.0,0
3,Albert Park Circuit,Street circuit,1,Melbourne,Australia,5278.0,16.0,Australian Grand Prix,"1996, 2019, 2022, 2024",27.0,79.8,Charles Leclerc,Ferrari SF-24,2024.0,58.0,307574.0,350.0,300.0,2.6,14.0,0
4,Algarve International Circuit,Race circuit,1,Portimao,Portugal,4653.0,15.0,Portuguese Grand Prix,"2020, 2021",2.0,78.7,Lewis Hamilton,Mercedes W11,2020.0,66.0,306826.0,400.0,900.0,24.0,12.0,1
5,Autodromo do Estoril,Race circuit,1,Estoril,Portugal,4360.0,13.0,Portuguese Grand Prix,"1984, 1996",13.0,82.4,David Coulthard,Williams FW16B,1994.0,71.0,309560.0,300.0,1020.0,0.0,10.0,1
6,Autodromo Hermanos RodrÃÂ­guez,Race circuit,1,Mexico City,Mexico,4304.0,17.0,"Mexican Grand Prix, Mexico City Grand Prix","1963, 1970, 1986, 1992, 2015, 2019, 2021, 2024",24.0,77.8,Valtteri Bottas,Mercedes W12,2021.0,71.0,305354.0,890.0,1200.0,2.8,9.0,1
7,Autodromo Internacional do Rio de Janeiro,Race circuit,0,Rio de Janeiro,Brazil,5031.0,11.0,Brazilian Grand Prix,"1978, 1981, 1989",10.0,92.5,Riccardo Patrese,Williams FW12C,1989.0,61.0,306891.0,250.0,1184.676471,0.0,12.0,1
8,Autodromo Internazionale del Mugello,Race circuit,1,Scarperia e San Piero,Italy,5245.0,14.0,Tuscan Grand Prix,2020,1.0,78.8,Lewis Hamilton,Mercedes W11,2020.0,59.0,309457.0,700.0,1141.0,41.0,9.6,1
9,Autodromo Internazionale Enzo e Dino Ferrari,Race circuit,0,Imola,Italy,4909.0,19.0,"Italian Grand Prix, San Marino Grand Prix, Emi...","1980, 2006, 2020, 2022, 2024",31.0,75.5,Lewis Hamilton,Mercedes W11,2020.0,63.0,309049.0,399.0,550.0,33.32,12.0,1


In [None]:
circuits.isnull().sum()

Unnamed: 0,0
Circuit,0
Type,0
Direction,0
Location,0
Country,0
Length,0
Turns,0
Grands Prix,0
Season(s),0
Grands Prix held,0


In [None]:
# save this dataset as this is the final clean dataset
circuits.to_csv('clean_circuits.csv', index=False)