In [3]:
%config IPCompleter.greedy=True
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [4]:
import os, sys, re, math, datetime as dt, pandas as pd, numpy as np, time
import logging
import matplotlib.pyplot as plt
from string import Template
from IPython.display import display, HTML

logging.basicConfig(format='%(asctime)s [%(name)s:%(lineno)d:%(funcName)s] [%(levelname)s] %(message)s', level=logging.INFO)

pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_colwidth', 5000)
pd.set_option('display.width', 5000)

def display_df(df):
    display(df.head(4))
    print(df.shape)

In [5]:
laps = pd.read_csv('../f1_data/lap_times.csv')
laps.head()
laps.shape

(562507, 6)

In [6]:
laps.loc[(laps.raceId==1098) & (laps.driverId==830)]


Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
538121,1098,830,1,1,1:39.019,99019
538122,1098,830,2,1,1:37.974,97974
538123,1098,830,3,1,1:38.006,98006
538124,1098,830,4,1,1:37.976,97976
538125,1098,830,5,1,1:38.035,98035
538126,1098,830,6,1,1:37.986,97986
538127,1098,830,7,1,1:38.021,98021
538128,1098,830,8,1,1:38.154,98154
538129,1098,830,9,1,1:38.278,98278
538130,1098,830,10,1,1:38.369,98369


In [7]:
pits = pd.read_csv('../f1_data/pit_stops.csv')
pits.head()

Unnamed: 0,raceId,driverId,stop,lap,time,duration,milliseconds
0,841,153,1,1,17:05:23,26.898,26898
1,841,30,1,1,17:05:52,25.021,25021
2,841,17,1,11,17:20:48,23.426,23426
3,841,4,1,12,17:22:34,23.251,23251
4,841,13,1,13,17:24:10,23.842,23842


In [12]:
qualify = pd.read_csv('../f1_data/qualifying.csv')
qualify.head()

Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
0,1,18,1,1,22,1,1:26.572,1:25.187,1:26.714
1,2,18,9,2,4,2,1:26.103,1:25.315,1:26.869
2,3,18,5,1,23,3,1:25.664,1:25.452,1:27.079
3,4,18,13,6,2,4,1:25.994,1:25.691,1:27.178
4,5,18,2,2,3,5,1:25.960,1:25.518,1:27.236


In [8]:
circuits = pd.read_csv("../f1_data/circuits.csv")
circuits.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_International_Circuit
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_International_Circuit
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park


In [9]:
circuits.dtypes

circuitId       int64
circuitRef     object
name           object
location       object
country        object
lat           float64
lng           float64
alt             int64
url            object
dtype: object

In [10]:
gp = pd.read_csv("../f1_data/grand_prix.csv")
gp.head()

Unnamed: 0,race,circuitId,circuit,circuit_length,race_distance,laps
0,Australian Grand Prix,1,Melbourne Grand Prix Circuit,5.303,307.574,58
1,Bahrain Grand Prix,3,Bahrain International Circuit,5.412,308.238,57
2,Chinese Grand Prix,17,Shanghai International Circuit,5.451,305.066,56
3,Azerbaijan Grand Prix,73,Baku City Circuit,6.003,306.049,51
4,Spanish Grand Prix,4,Circuit de Barcelona-Catalunya,4.655,307.104,66


In [11]:
seasons = pd.read_csv('../f1_data/seasons.csv')
seasons.sort_values('year', ascending=True)

Unnamed: 0,year,url
60,1950,http://en.wikipedia.org/wiki/1950_Formula_One_season
59,1951,http://en.wikipedia.org/wiki/1951_Formula_One_season
58,1952,http://en.wikipedia.org/wiki/1952_Formula_One_season
57,1953,http://en.wikipedia.org/wiki/1953_Formula_One_season
56,1954,http://en.wikipedia.org/wiki/1954_Formula_One_season
55,1955,http://en.wikipedia.org/wiki/1955_Formula_One_season
54,1956,http://en.wikipedia.org/wiki/1956_Formula_One_season
53,1957,http://en.wikipedia.org/wiki/1957_Formula_One_season
52,1958,http://en.wikipedia.org/wiki/1958_Formula_One_season
51,1959,http://en.wikipedia.org/wiki/1959_Formula_One_season


In [13]:
circuit = pd.read_csv('../f1_data/circuits.csv')
circuit.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_International_Circuit
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_International_Circuit
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park
