# <span style="color:darkblue"> I. Import Libraries and Data </span>

<font size = "5">
Key libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

<font size = "5">
Importing Datasets 

In [3]:
drivers = pd.read_csv("data_raw/drivers.csv")
results = pd.read_csv("data_raw/results.csv")


<font size = "5">
 Datasets Description 

In [4]:
## Drivers dataset

# Number of rows

len(drivers["driverId"])

# Each row is a driver. There are 857 observations which 
# indicate that there are 857 drivers participating in the race, 
# each with there own specific driverId.


# Variables present

drivers.columns.values

# There are 9 variables within the drivers dataset  driverId', 'driverRef',
# 'number', 'code', 'forename', 'surname', 'dob', 'nationality', 'url'


## Drivers standings

# Number of rows

len(results)

# The number of rows is 25840 which indicates that the number of results from
# the races which can be based on driver Id


# Variables

results.columns.values

# These are the variables that are present in the dataset for results:
# resultId', 'raceId', 'driverId', 'constructorId', 'number',
# 'grid', 'position', 'positionText', 'positionOrder', 'points',
# 'laps', 'time', 'milliseconds', 'fastestLap', 'rank',
# 'fastestLapTime', 'fastestLapSpeed', 'statusId'.


array(['resultId', 'raceId', 'driverId', 'constructorId', 'number',
       'grid', 'position', 'positionText', 'positionOrder', 'points',
       'laps', 'time', 'milliseconds', 'fastestLap', 'rank',
       'fastestLapTime', 'fastestLapSpeed', 'statusId'], dtype=object)

<font size = "5">
Merging 

In [5]:
## Create new data set for the fastest times by driver ID

fastest_times = (results.groupby("driverId")
                 ["fastestLapTime"].min())


## Merging data sets drivers and fastest times 

drivers_fastest = pd.merge(drivers,
                         fastest_times,
                         on = "driverId",
                         how = "left")

display(drivers_fastest)

# Merging the two data sets allows us to see the drivers information
# such as nationality, name along with their fastest time. Now we will
# see which nationalities produce the fastest lap times

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url,fastestLapTime
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,1:06.719
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld,1:13.095
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg,1:08.491
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso,1:08.405
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen,1:13.998
...,...,...,...,...,...,...,...,...,...,...
852,854,mick_schumacher,47,MSC,Mick,Schumacher,1999-03-22,German,http://en.wikipedia.org/wiki/Mick_Schumacher,1:09.394
853,855,zhou,24,ZHO,Guanyu,Zhou,1999-05-30,Chinese,http://en.wikipedia.org/wiki/Guanyu_Zhou,1:09.380
854,856,de_vries,45,DEV,Nyck,de Vries,1995-02-06,Dutch,http://en.wikipedia.org/wiki/Nyck_de_Vries,1:26.624
855,857,piastri,81,PIA,Oscar,Piastri,2001-04-06,Australian,http://en.wikipedia.org/wiki/Oscar_Piastri,


<font size = "5">
Querry 

In [16]:
## Grouping by nationality and seeing which ones had the fast times
 
#merge driver based on nationality and calculate average placement
rank_driver = pd.merge(drivers,
                       results,
                       on = "driverId",
                       how = "left")

rank_driver['rank'] = pd.to_numeric(rank_driver['rank'], errors= 'coerce')
rank_driver['points'] = pd.to_numeric(rank_driver['points'], errors= 'coerce')
rank_driver['position'] = pd.to_numeric(rank_driver['position'], errors= 'coerce')

nat_avg = (rank_driver.groupby("nationality")
          .agg(mean_rank = ('rank','mean'),
                 mean_points = ('points','mean'),
                 mean_position = ('position','mean')))

nat_avg = nat_avg.sort_values(by = 'mean_points', ascending= False)


display(nat_avg)


Unnamed: 0_level_0,mean_rank,mean_points,mean_position
nationality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monegasque,7.320388,6.477273,6.309278
Dutch,8.966245,4.192708,8.283217
Finnish,8.006579,3.81735,6.903585
Spanish,8.940375,3.535888,8.118674
Australian,8.862408,3.445189,7.294444
German,9.62963,3.333263,7.5378
Mexican,9.834459,3.035545,8.895765
Polish,10.530612,2.767677,9.411765
Thai,9.915254,2.679487,9.396825
Colombian,6.02381,2.456,4.80597
