In [71]:
import requests
import pandas as pd
from bs4 import BeautifulSoup


In [100]:
DATES = ['2019', '2020']
BASE_URL = "https://www.f1fantasytracker.com/"

In [63]:
def get_driver_url(driver):
    return f"{BASE_URL}{driver}.html"

In [64]:
def get_soup(url):
    r = requests.get(url) 
    return BeautifulSoup(r.content, 'html5lib')

In [101]:
def get_drivers():    
    url = f"{BASE_URL}drivers-teams.html"
    soup = get_soup(url)
    driver_rows = soup.findAll('div', attrs = 'driverteamimagewrapper hovereffect')
    drivers = [row.find('a')['href'].replace(".html", "") for row in driver_rows]
    return drivers

drivers = get_drivers()

In [65]:
def get_stats_table(soup, date):
    return soup.find('div', attrs = {'class': f"{date} box"})

In [66]:
def stats_table_to_dict(table):
    stats = {}
    rows = table.findAll('div', attrs={'class':'statsTableRow'})
    for row in rows:
        key, value = row.findAll('div')
        stats[key.string] = value.string
    return stats

In [107]:
def get_driver_stats_tables(drivers, dates):
    stats = []
    for driver in drivers:
        url = get_driver_url(driver)
        soup = get_soup(url)
        for year in dates:
            table = get_stats_table(soup, year)
            if table:
                driver_year_stats = stats_table_to_dict(table)
                driver_year_stats["Season"] = year
                driver_year_stats["Driver"] = driver
                stats.append(driver_year_stats)
    return stats

In [108]:
tables = get_driver_stats_tables(drivers, DATES)

In [109]:
stats_df = pd.DataFrame(tables)

In [110]:
stats_df

Unnamed: 0,Fantasy Position,Average Fantasy Pts,DNF Rate,Average Overtakes,Beat Teammate Rate,Top 5 Highest Scorer,Fantasy Points,Podiums,Overtakes,DNFs,Fastest Laps,Completed Streaks,Season,Driver
0,1st,40.0,0%,-0.1,71%,90%,841,17,2,0,6,8,2019,hamilton
1,1st,40.82,0%,-0.41,71%,88%,699,14,-7,0,6,6,2020,hamilton
2,2nd,31.48,10%,0.5,29%,19%,661,15,10,1,2,7,2019,bottas
3,3rd,25.59,6%,-1.82,29%,65%,435,11,-31,1,2,4,2020,bottas
4,4th,25.8,10%,-0.6,43%,71%,547,10,-14,2,4,6,2019,leclerc
5,9th,14.0,24%,0.76,59%,35%,238,2,13,4,0,1,2020,leclerc
6,8th,12.33,14%,1.0,62%,14%,262,1,16,3,0,2,2019,sainz
7,7th,15.88,18%,1.47,47%,35%,270,1,25,3,1,2,2020,sainz
8,3rd,30.1,10%,0.84,76%,81%,630,9,16,2,3,5,2019,verstappen
9,2nd,26.88,29%,0.41,71%,65%,457,11,7,5,3,4,2020,verstappen


In [111]:
stats_df.to_csv("driver_stats.csv")