# F1 Fantasy Program

## ReadMe

The goal of this program is to webscrape, and gather, F1 data. The data would then be saved, transformed, and analysed.

# Imports

#### FastF1

In [1]:
from datetime import datetime
from bs4 import BeautifulSoup
import pandas as pd
import requests
import glob

import fastf1

In [2]:
from datetime import datetime

#### MySQL

In [3]:
import mysql.connector
import json

In [4]:
import glob
import csv

### Ignore Warnings

In [5]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Global Variables & Settings

### Enable FastF1 Caching

In [6]:
fastf1.Cache.enable_cache('cache')

### Loaded races logfile name

In [7]:
loaded_races_logfile = 'loaded_races_logfile.txt'

# FastF1 Data Aquisition

### Get the completed events (race weekends) of the season (year)

In [8]:
def get_completed_events(season, b_print=False):
    # Retrieve the event schedule for the given season using the fastf1 library
    schedule = fastf1.get_event_schedule(season)
    
    # Initialize an empty list to store completed events
    completed_events = []

    # Loop through the round numbers in the event schedule, skipping the first one (index 0)
    for event in schedule['RoundNumber'][1:]:
        # Check if the event date is in the past (i.e., the event is completed)
        if schedule['EventDate'][event] < datetime.now():
            # Append the completed event to the completed_events list
            completed_events.append(schedule.iloc[[event]])

    # Return the list of completed events
    return completed_events
	
# Commented by GPT4

### Get Amount of completed events of the season (year)

In [9]:
def get_completed_events_count(season):
	count = len(get_completed_events(season))
	return count

### Load the data of a completed session

In [10]:
def get_session_data(season, event_num, session_type):
    df_session = fastf1.get_session(season, event_num, session_type)
    df_session.load()
    return df_session

### Load the data of all the completed sessions

In [11]:
def get_completed_sessions(season, completed_events, session_type):
    # Initialize an empty list to store completed sessions
    completed_sessions = []

    # Loop through the completed_events list, using the enumerate function to keep track of the index
    for i, event in enumerate(completed_events):
        # Get the session data for the current event and append it to the list of completed sessions
        session = get_session_data(season, event['RoundNumber'], session_type)
        completed_sessions.append(session)

    # Return the list of completed sessions
    return completed_sessions

# commented by GPT3.5

### Get All Completed Sessions' data

In [12]:
def get_completed_sessions(season, completed_events, session_type):
    # Returns a list of dataframes
    completed_sessions = []

    for event_df in completed_events:
        for _, event in event_df.iterrows():
            session = get_session_data(season, event['RoundNumber'], session_type)
            completed_sessions.append(session)

    return completed_sessions


# Save to CSV file

In [13]:
def save_to_csv(target_csv, data_to_load):
    data_to_load.to_csv(target_csv) #, mode='a', header=False, index=False)

### Save sessions to csv (Outdated)

In [14]:
def save_df_list_to_csv(dataframe_list, name="dataframe"):
    for i, dataframe in enumerate(dataframe_list):
        file_name = name + '_' + str(i) + '.csv'
        dataframe.to_csv(file_name)

### Log Loaded Race

In [15]:
def log_loaded_race(race_number, loaded_races_logfile = 'loaded_races_logfile.txt'):
	timestamp_format = '%Y-%h-%d-%H:%M:%S' #Year-Monthname-Day-Hour-Minute-Second
	now = datetime.now()
	timestamp = now.strftime(timestamp_format)
	with open(loaded_races_logfile, 'a') as log:
		log.write(timestamp + ','+str(race_number)+'\n')

### Read logfile

In [16]:
def read_loaded_races_logfile(loaded_races_logfile = 'loaded_races_logfile.txt'):
	loaded_races = []
	with open(loaded_races_logfile, 'r') as log:
		for line in log.readlines():
			print(line)
			fields = line.strip().split(',')
			loaded_races.append(int(fields[1]))
	
	return loaded_races

### Find missing races

In [17]:
def find_missing_races(races_list: list, season: int):
	# given a list of races, and the count of completed races, it finds the numbers that are missing
	if len(races_list) == 0:
		races_list = [0]
	missing = set(range(min(races_list), get_completed_events_count(season)+1)) - set(races_list)
	return list(missing)

# Webscraping

In [18]:
# url = 'https://www.f1fantasytracker.com/prices.html'  # replace with the URL of the website you want to scrape
# response = requests.get(url)
# soup = BeautifulSoup(response.content, 'html.parser')

# current_price = soup.find('span', {'id': 'CurrentPrice1'}).text
# season_price = soup.find('span', {'id': 'SeasonPrice1'}).text

# print(current_price)
# print(season_price)

# Extract Data, into CSV files

In [19]:
def extract_data_into_csv(season=2023, session_type='R'):
	event_number = 0

	loaded_races = read_loaded_races_logfile()
	# print(loaded_races)
	missing_races = find_missing_races(loaded_races, season)
	# print(missing_races)

	races_csv_folder = 'races_csv/'
	for event_num in missing_races:
		session = get_session_data(season, event_num, session_type)
		save_to_csv(races_csv_folder+str(session.event.gp)+'_'+str(session.event.date)+'.csv',session.results)
		log_loaded_race(event_num) # maybe move this to after the data is actually loaded into the database

# MySQL f1stats

### Connect to Database

###### Create a 'private_config.json' file, and add the details of your database there.

In [20]:
with open('private_config.json', 'r') as file:
	config_data = json.load(file)


mydb = mysql.connector.connect(
  host=config_data['db_host'],
  user=config_data['db_user'],
  password=config_data['db_password'],
  database=config_data['db_database']
)

### Load data into Database

#### Insert Functions

In [2]:
# teams table
def insert_into_teams(cursor, data_row):
	try:
		teams_insert_query = '''
			INSERT INTO teams
			(name, points, constructor, nationality)
			VALUES (%s, %s, %s, %s)
		'''
		cursor.execute(teams_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'],
				data_row['column3'],
				data_row['column4'],  
			)
		) #TODO finish it
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into teams: {e}")
		raise



# team_price_history table
def insert_into_team_price_history(cursor, data_row, team_id_fk):
	try:
		team_price_history_insert_query = '''
			INSERT INTO team_price_history
			(price, date, team_id)
			VALUES (%s, %s, %s)
		'''
		cursor.execute(team_price_history_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
				team_id_fk
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into team_price_history: {e}")
		raise



# drivers table
def insert_into_drivers(cursor, data_row, team_id_fk):
	try:
		drivers_insert_query = '''
			INSERT INTO drivers
			(first_name, last_name, country, number, points, team_id)
			VALUES (%s, %s, %s, %s, %s, %s)
		'''
		cursor.execute(drivers_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
				data_row['column3'], 
				data_row['column4'], 
				data_row['column5'], 
				team_id_fk
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into drivers: {e}")
		raise


# driver_price_history table
def insert_into_driver_price_history(cursor, data_row, driver_id_fk):
	try:
		driver_price_history_insert_query = '''
			INSERT INTO driver_price_history
			(price, date, driver_id)
			VALUES (%s, %s, %s)
		'''
		cursor.execute(driver_price_history_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
				driver_id_fk
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into driver_price_history: {e}")
		raise



# circuit table
def insert_into_circuit(cursor, data_row):
	try:
		circuit_insert_query = '''
			INSERT INTO circuit
			(name, country)
			VALUES (%s, %s)
		'''
		cursor.execute(circuit_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into circuit: {e}")
		raise


# race table
def insert_into_race(cursor, data_row, circuit_id_fk):
	try:
		race_insert_query = '''
			INSERT INTO race
			(race_type, date, laps, lap_length, circuit_id)
			VALUES (%s, %s, %s, %s, %s)
		'''
		cursor.execute(race_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
				data_row['column3'], 
				data_row['column4'], 
				circuit_id_fk
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into race: {e}")
		raise


# race_results table
def insert_into_race_results(cursor, data_row, race_id_fk, driver_id_fk, team_id_fk):
	try:
		race_results_insert_query = '''
			INSERT INTO race_results
			(qualifying_position, race_position, fastest_lap_time, race_id, driver_id, team_id)
			VALUES (%s, %s, %s, %s, %s, %s)
		'''
		cursor.execute(race_results_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
				data_row['column3'], 
				race_id_fk,
				driver_id_fk,
				team_id_fk
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into race_results: {e}")
		raise


# weather table
def insert_into_weather(cursor, data_row, race_id_fk):# todo add more weather stuff
	try:
		weather_insert_query = '''
			INSERT INTO weather
			(conditions, temperature, race_id)
			VALUES (%s, %s, %s)
		'''
		cursor.execute(weather_insert_query, 
			(
				data_row['column1'], 
				data_row['column2'], 
				race_id_fk
			)
		)
		return cursor.lastrowid
	except Exception as e:
		print(f"Error occurred while inserting into weather: {e}")
		raise


#### Insert Execution Function

In [3]:
def execute_db_insert_functions():
	with mydb.cursor() as mycursor:
		try:
			# Call insert functions here

			mydb.commit()
		except Exception as e:
			print(f"An error occurred: {e}")
			mydb.rollback()
			raise