In [108]:
import sqlite3
import pprint
import pandas as pd
import numpy as np
import re
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
# Create a SQL connection to our SQLite database
con = sqlite3.connect("formula1.sqlite")
cur = con.cursor()

# Podiums

In [4]:
query = """
With R as (SELECT raceId, name, year, name from races),
Re as (SELECT resultId, raceId, driverId, grid, position from results),
D as (Select driverId, surname from drivers)
Select R.name, D.driverid, surname, grid, position, year from Re
INNER JOIN R ON R.raceid == Re.raceid
INNER JOIN D ON D.driverid == Re.driverid
where position <= 3.0
order by year;
"""

In [5]:
df = pd.read_sql_query(query, con)
df

Unnamed: 0,name,driverId,surname,grid,position,year
0,British Grand Prix,642,Farina,1,1,1950
1,British Grand Prix,786,Fagioli,2,2,1950
2,British Grand Prix,686,Parnell,4,3,1950
3,Monaco Grand Prix,579,Fangio,1,1,1950
4,Monaco Grand Prix,647,Ascari,7,2,1950
...,...,...,...,...,...,...
3070,Brazilian Grand Prix,842,Gasly,6,2,2019
3071,Brazilian Grand Prix,832,Sainz,20,3,2019
3072,Abu Dhabi Grand Prix,1,Hamilton,1,1,2019
3073,Abu Dhabi Grand Prix,830,Verstappen,2,2,2019


# Driver Rankings

In [36]:
query = """
With DS as (SELECT * from driver_standings),
D as (Select driverId, surname from drivers),
R as (SELECT raceId, name, year, name, round from races),
C as (
    Select constructors.ConstructorId, name, color, results.raceId, results.driverId from constructors
    INNER JOIN results ON results.ConstructorId = constructors.ConstructorId)
Select year, R.name as gp, round, surname as pilot, points, C.name as team, C.color from DS
INNER JOIN D ON D.driverId = DS.driverId 
INNER JOIN R ON R.raceId = DS.raceId
INNER JOIN C ON C.raceID = DS.raceId AND C.driverId = DS.driverId
WHERE YEAR = 2019
ORDER BY R.year ASC, R.round ASC, points DESC;
"""

In [59]:
df = pd.read_sql_query(query, con)
races_rankings = df.groupby('pilot')

In [60]:
def get_name(gp):
    return gp.split(' ')[0]

In [61]:
data = []
flag = []
annotations = []
rnames = dt['gp'].apply(get_name).values
gps = np.concatenate((np.array(['Start']), rnames))

for race in races_rankings:
    name = race[0]
    dt = race[1].sort_values('round')
#     rnames = dt['gp'].values
    ranking = dt['points'].values
    color = dt['color'].iloc[-1]
    
    
    rankings = np.concatenate((np.array([0]), ranking))
#     tmp = go.Scatter(x=gps, y=rankings, name=name, mode='lines')
    if color != 'None':
#         if name == best_driver_teams[team][0]:
        tmp = go.Scatter(x=gps, y=rankings, name=name, mode='lines', line = dict(color=color, width=2))
#         else:
#             tmp = go.Scatter(x=laps, y=positions, name=name, mode='lines', line = dict(color=color, width=2, dash='dash'))
    else:
        tmp = go.Scatter(x=gps, y=rankings, name=name, mode='lines')

    data.append(tmp)

    # labeling the right_side of the plot
#     annotations.append(dict(xref='paper', x=1.05, y=pos[-1],
#                                   xanchor='auto', yanchor='middle',
#                                   text='{}'.format(codes[codes['surname']==name]['code'].values[0]),
#                                   font=dict(family='Arial',
#                                             size=16),
#                                   showarrow=False))

fig = go.Figure(data=data)
# fig.update_layout(annotations=annotations)
fig.update_layout(
        title='Overall Driver Championship Ranking',
        xaxis_title="Races",
        yaxis_title="Overall Points",
        font=dict(
            family="Courier New, monospace",
            size=18,
            color="#7f7f7f"
        )
    )
fig.update_xaxes(tickangle=45)
fig.show()

# Drivers Position during the race

In [62]:
query = """
With RES as (SELECT * from results),
D as (Select driverId, surname from drivers),
R as (SELECT raceId, name, year, name, round from races)
Select R.year, R.name, R.round, D.surname, grid, position, points, fastestlap, fastestlaptime, rank from RES
INNER JOIN D ON D.driverId = RES.driverId
INNER JOIN R ON R.raceId = RES.raceId
WHERE YEAR = 2019
ORDER BY R.year ASC, R.round ASC, points DESC;
"""

df = pd.read_sql_query(query, con)
df

Unnamed: 0,year,name,round,surname,grid,position,points,fastestLap,fastestLapTime,rank
0,2019,Australian Grand Prix,1,Bottas,2,1.0,26.0,57.0,1:25.580,1
1,2019,Australian Grand Prix,1,Hamilton,1,2.0,18.0,57.0,1:26.057,2
2,2019,Australian Grand Prix,1,Verstappen,4,3.0,15.0,57.0,1:26.256,3
3,2019,Australian Grand Prix,1,Vettel,3,4.0,12.0,16.0,1:27.954,8
4,2019,Australian Grand Prix,1,Leclerc,5,5.0,10.0,58.0,1:26.926,4
...,...,...,...,...,...,...,...,...,...,...
415,2019,Abu Dhabi Grand Prix,21,Giovinazzi,16,16.0,0.0,28.0,1:43.256,15
416,2019,Abu Dhabi Grand Prix,21,Russell,18,17.0,0.0,50.0,1:43.074,13
417,2019,Abu Dhabi Grand Prix,21,Gasly,11,18.0,0.0,53.0,1:42.414,10
418,2019,Abu Dhabi Grand Prix,21,Kubica,19,19.0,0.0,51.0,1:44.500,20


In [64]:
# races = df.groupby('round')

# for race in races:
#     print(race[1].sort_values('rank'))

In [189]:
def get_results_race(id_race):
    query = """
        With L as (SELECT * from lap_times),
        R as (Select raceId, DriverId, ConstructorId, grid, position, laps from results),
        RA as (SELECT raceId, name, year, name, round from races),
        C as (Select constructorId, name, color from constructors),
        D as (Select driverId, code, surname from drivers)
        Select RA.year, RA.name as GP, C.name, D.surname, D.code as Abb, R.grid, L.lap, L.position, R.position as final, C.color from L
        INNER JOIN RA ON RA.raceId = L.raceId
        INNER JOIN R ON R.driverId = L.driverId and R.raceId = L.raceId
        INNER JOIN D on D.driverId = L.driverId
        INNER JOIN C ON C.ConstructorId = R.ConstructorId 
        WHERE RA.raceId = {}
        """.format(id_race)
    
    df = pd.read_sql_query(query, con)
    gp = str(df['GP'].unique()[0])
#     print(gp)
#     pp = pprint.PrettyPrinter(indent=4)
    
    # Get the best driver of each team
    teams_results = df[['surname', 'name', 'final']].drop_duplicates().groupby('name')
    best_driver_teams = {}
    for team_results in teams_results:
        res = team_results[1]
        tmp = res[res['final'] == res['final'].min()].values
        if len(tmp) > 0:
            best_driver_teams[team_results[0]] = tmp[0]
        else:
            # Both drivers got DNF
            # Setting best long lasting one as best
            bd_dnf = df[df['name'] == team_results[0]][['surname', 'lap']].groupby('surname')['lap'].max().index[0]
            best_driver_teams[team_results[0]] = [bd_dnf]
    
#     pp.pprint(best_driver_teams)
    
    races = df.groupby('surname')
    data = []
    flag = []
    annotations = []
    for race in races:
        dt = race[1].sort_values('lap')
        grid = dt['grid'].iloc[0]
        if grid == 0:
            grid = 20
        color = dt['color'].iloc[0]
#         print(color)
        team = dt['name'].iloc[0]
        name = race[0]
        laps = np.concatenate((np.array([0]), dt['lap'].values))
        pos = dt['position'].values
        positions = np.concatenate((np.array([grid]), pos))
#         print(best_driver_teams[team])
        if color != 'None':
            if name == best_driver_teams[team][0]:
                tmp = go.Scatter(x=laps, y=positions, name=name, mode='lines', line = dict(color=color, width=2))
            else:
                tmp = go.Scatter(x=laps, y=positions, name=name, mode='lines', line = dict(color=color, width=2, dash='dash'))
        else:
            tmp = go.Scatter(x=laps, y=positions, name=name, mode='lines')
            
        data.append(tmp)

        # labeling the right_side of the plot
    #     annotations.append(dict(xref='paper', x=1.05, y=pos[-1],
    #                                   xanchor='auto', yanchor='middle',
    #                                   text='{}'.format(codes[codes['surname']==name]['code'].values[0]),
    #                                   font=dict(family='Arial',
    #                                             size=16),
    #                                   showarrow=False))

    fig = go.Figure(data=data)
    # fig.update_layout(annotations=annotations)
    fig.update_layout(
            title=gp,
            xaxis_title="Laps",
            yaxis_title="Ranking",
            font=dict(
                family="Courier New, monospace",
                size=18,
                color="#7f7f7f"
            )
        )

    fig.show()

In [191]:
for i in range(900,1031):
    get_results_race(i)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [192]:
df

Unnamed: 0,year,GP,name,surname,Abb,grid,lap,position,final,color
0,2019,British Grand Prix,Mercedes,Hamilton,HAM,2,1,2,1.0,#00D2BE
1,2019,British Grand Prix,Mercedes,Hamilton,HAM,2,2,2,1.0,#00D2BE
2,2019,British Grand Prix,Mercedes,Hamilton,HAM,2,3,2,1.0,#00D2BE
3,2019,British Grand Prix,Mercedes,Hamilton,HAM,2,4,2,1.0,#00D2BE
4,2019,British Grand Prix,Mercedes,Hamilton,HAM,2,5,2,1.0,#00D2BE
...,...,...,...,...,...,...,...,...,...,...
908,2019,British Grand Prix,Haas F1 Team,Magnussen,MAG,16,2,20,,#F0D787
909,2019,British Grand Prix,Haas F1 Team,Magnussen,MAG,16,3,20,,#F0D787
910,2019,British Grand Prix,Haas F1 Team,Magnussen,MAG,16,4,20,,#F0D787
911,2019,British Grand Prix,Haas F1 Team,Magnussen,MAG,16,5,20,,#F0D787


# Laps proportion per race

In [76]:
def get_results_race(id_race):
    query = """
        With L as (SELECT * from lap_times),
        R as (Select raceId, DriverId, ConstructorId, grid, position, laps from results),
        RA as (SELECT raceId, name, year, name, round from races),
        C as (Select constructorId, name, color from constructors),
        D as (Select driverId, code, surname from drivers)
        Select RA.name as GP, C.name, D.code as Abb, L.lap, L.position, R.position as final, C.color from L
        INNER JOIN RA ON RA.raceId = L.raceId
        INNER JOIN R ON R.driverId = L.driverId and R.raceId = L.raceId
        INNER JOIN D on D.driverId = L.driverId
        INNER JOIN C ON C.ConstructorId = R.ConstructorId 
        WHERE RA.raceId = {}
        """.format(id_race)
    
    df = pd.read_sql_query(query, con)
    return df

In [89]:
race = get_results_race(1010)
race.head()

Unnamed: 0,GP,name,Abb,lap,position,final,color
0,Australian Grand Prix,Mercedes,BOT,1,1,1.0,#00D2BE
1,Australian Grand Prix,Mercedes,BOT,2,1,1.0,#00D2BE
2,Australian Grand Prix,Mercedes,BOT,3,1,1.0,#00D2BE
3,Australian Grand Prix,Mercedes,BOT,4,1,1.0,#00D2BE
4,Australian Grand Prix,Mercedes,BOT,5,1,1.0,#00D2BE


In [90]:
drivers = race.groupby(['Abb'])

In [98]:
race_positions = {}
for driver in drivers:
    dname = driver[0]
    positions = driver[1].groupby('position')
    pos = []
    num_laps = []
    for position in positions:
        pos.append(position[0])
        num_laps.append(position[1].shape[0])

    race_positions[dname] = (pos, num_laps)

In [99]:
race_positions

{'ALB': ([9, 10, 11, 14, 15, 16], [1, 1, 11, 28, 15, 1]),
 'BOT': ([1, 2], [56, 2]),
 'GAS': ([6, 7, 8, 10, 11, 12, 14, 15, 16, 17],
  [10, 1, 11, 2, 20, 1, 1, 2, 8, 1]),
 'GIO': ([9, 10, 12, 13, 14, 15, 16, 17], [3, 1, 8, 1, 1, 29, 4, 10]),
 'GRO': ([6, 7, 13, 14], [1, 13, 4, 11]),
 'HAM': ([2, 3, 4], [45, 3, 10]),
 'HUL': ([7, 8, 9, 10, 11, 13, 14], [21, 22, 1, 7, 4, 1, 1]),
 'KUB': ([17, 18, 19, 20], [26, 1, 22, 6]),
 'KVY': ([7, 9, 10, 11, 13, 14, 15], [11, 1, 20, 13, 1, 2, 9]),
 'LEC': ([2, 3, 4, 5], [3, 10, 1, 44]),
 'MAG': ([6, 7, 8, 9, 10, 11], [35, 10, 1, 8, 3, 1]),
 'NOR': ([7, 8, 9, 10, 12, 13], [1, 1, 1, 11, 33, 10]),
 'PER': ([11, 12, 13, 14, 15, 16, 17], [1, 11, 28, 3, 1, 12, 1]),
 'RAI': ([8, 9, 10, 11, 12, 13, 14, 15, 16], [21, 21, 1, 7, 3, 1, 1, 1, 1]),
 'RIC': ([16, 17, 18, 19, 20], [2, 1, 16, 6, 3]),
 'RUS': ([16, 17, 18], [28, 16, 12]),
 'SAI': ([14], [9]),
 'STR': ([6, 8, 9, 10, 12, 13], [12, 1, 21, 11, 1, 11]),
 'VER': ([1, 2, 3, 4, 5], [2, 8, 29, 17, 2]),
 'VET':

In [277]:
def get_indexes(i,rows,cols):
    col = i % cols
    row = (i - col) / cols
    return(int(row)+1, col+1)

In [289]:
r = 5
c = 4

specs = [[{"type": "domain"}]*c]*r
drivers = list(race_positions.keys())
fig = make_subplots(rows=r, cols=c, specs=specs, subplot_titles=drivers)
for i, driver in enumerate(race_positions.keys()):
    data = race_positions[driver]
    row, col = get_indexes(i, r, c)
    fig.add_trace(go.Pie(labels=data[0], values=data[1], name=driver), row=row, col=col)
#     fig.update_yaxes(title=driver, row=row, col=col)
    
fig.show()