# Pitstops and their Impact on Race Outcome
We will be exploring pitstop data from F1 seasons 2018-2023 and looking at how they determine the outcome of the races

## STEP 1 - Loading the Data & Libraries

In [2]:
# Let us start by importing the necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn.model_selection as model_selection

# Load the datasets
races = pd.read_csv('data/races.csv')
results = pd.read_csv('data/results.csv')
pit_stops = pd.read_csv('data/pit_stops.csv')
drivers = pd.read_csv('data/drivers.csv')

# Display the first few rows of the dataset
races.head(), results.head(), pit_stops.head(), drivers.head(), 

(   raceId  year  round  circuitId                   name        date  \
 0       1  2009      1          1  Australian Grand Prix  2009-03-29   
 1       2  2009      2          2   Malaysian Grand Prix  2009-04-05   
 2       3  2009      3         17     Chinese Grand Prix  2009-04-19   
 3       4  2009      4          3     Bahrain Grand Prix  2009-04-26   
 4       5  2009      5          4     Spanish Grand Prix  2009-05-10   
 
        time                                                url fp1_date  \
 0  06:00:00  http://en.wikipedia.org/wiki/2009_Australian_G...       \N   
 1  09:00:00  http://en.wikipedia.org/wiki/2009_Malaysian_Gr...       \N   
 2  07:00:00  http://en.wikipedia.org/wiki/2009_Chinese_Gran...       \N   
 3  12:00:00  http://en.wikipedia.org/wiki/2009_Bahrain_Gran...       \N   
 4  12:00:00  http://en.wikipedia.org/wiki/2009_Spanish_Gran...       \N   
 
   fp1_time fp2_date fp2_time fp3_date fp3_time quali_date quali_time  \
 0       \N       \N       \N

## STEP 2 - Cleaning the Data
We are only interested in data from seasons 2018-2023 so let us try to filter the dataset

In [3]:
# Filtering the dataset for years 2018 to 2023
races_2018_2023 = races[(races['year'] >= 2018) & (races['year'] <= 2023)]

# Getting the raceId for the years 2018 to 2023
raceIds_2018_2023 = races_2018_2023['raceId'].unique()

# Filtering pit_stops & results dataset with the raceIds from 2018 to 2023
pit_stops_2018_2023 = pit_stops[pit_stops['raceId'].isin(raceIds_2018_2023)]
results_2018_2023 = results[results['raceId'].isin(raceIds_2018_2023)]