In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import fastf1
import yfinance as yf
from datetime import datetime, timedelta
import statsmodels.api as sm
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Enable FastF1 cache
fastf1.Cache.enable_cache('f1_cache')



In [28]:
import yfinance as yf
from datetime import datetime, timedelta

# Function to get stock data for a specific ticker
def get_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data

# Example: Get Ferrari stock data from 2016 (IPO) to present
ferrari_stock = get_stock_data('RACE', '2016-01-01', datetime.now().strftime('%Y-%m-%d'))
print("Ferrari stock data preview:")
print(ferrari_stock.head())

# Function to get stock data for all constructors in our mapping
def get_all_stocks_data(tickers_dict, start_date, end_date):
    stock_data = {}
    for constructor, ticker in tickers_dict.items():
        try:
            data = get_stock_data(ticker, start_date, end_date)
            stock_data[constructor] = data
            print(f"Successfully downloaded data for {constructor} ({ticker})")
        except Exception as e:
            print(f"Error downloading data for {constructor} ({ticker}): {e}")
    return stock_data
stock_tickers = {
    'Mercedes': 'DMLRY',       # Mercedes parent company Daimler
    'Ferrari': 'RACE',         # Ferrari N.V.
    'Red Bull': None,          # Private team, use parent company
    'McLaren': None,           # Private company
    'Alpine': 'RNSDF',         # Renault
    'Aston Martin': 'AML.L',   # Aston Martin
    'Williams': None,          # Private team
    'AlphaTauri': None,        # Private team
    'Alfa Romeo': 'STLA',      # Stellantis (parent company)
    'Haas': None    
}

# Get data for all publicly traded constructors
stocks_data = get_all_stocks_data(stock_tickers, '2016-01-01', datetime.now().strftime('%Y-%m-%d'))

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['DMLRY']: YFTzMissingError('possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completed


Ferrari stock data preview:
Price           Close       High        Low       Open   Volume
Ticker           RACE       RACE       RACE       RACE     RACE
Date                                                           
2016-01-04  44.159557  44.550929  43.246362  44.066375  2507000
2016-01-05  45.184574  45.408212  44.215467  45.287073  1934400
2016-01-06  43.861374  44.597523  43.404778  44.495021  2554900
2016-01-07  43.470005  43.870693  43.013405  43.255680  1775300
2016-01-08  40.171318  43.451370  40.171318  43.423413  5116900
Successfully downloaded data for Mercedes (DMLRY)
Successfully downloaded data for Ferrari (RACE)
Error downloading data for Red Bull (None): 'NoneType' object has no attribute 'replace'
Error downloading data for McLaren (None): 'NoneType' object has no attribute 'replace'


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Successfully downloaded data for Alpine (RNSDF)
Successfully downloaded data for Aston Martin (AML.L)
Error downloading data for Williams (None): 'NoneType' object has no attribute 'replace'
Error downloading data for AlphaTauri (None): 'NoneType' object has no attribute 'replace'


[*********************100%***********************]  1 of 1 completed

Successfully downloaded data for Alfa Romeo (STLA)
Error downloading data for Haas (None): 'NoneType' object has no attribute 'replace'





In [36]:
stock_data

{'Mercedes': Empty DataFrame
 Columns: [(Adj Close, DMLRY), (Close, DMLRY), (High, DMLRY), (Low, DMLRY), (Open, DMLRY), (Volume, DMLRY), (Team, ), (Ticker, )]
 Index: [],
 'Ferrari': Price            Close        High         Low        Open  Volume     Team  \
 Ticker            RACE        RACE        RACE        RACE    RACE            
 Date                                                                          
 2019-01-02   94.720909   95.210351   93.674856   94.145100  293100  Ferrari   
 2019-01-03   93.070244   94.903238   92.724757   94.864850  450400  Ferrari   
 2019-01-04   98.108597   98.636419   95.258331   96.189227  524200  Ferrari   
 2019-01-07  100.267883  101.467489   98.799559   99.154645  427500  Ferrari   
 2019-01-08  104.202591  104.845578  103.070162  103.281294  762200  Ferrari   
 ...                ...         ...         ...         ...     ...      ...   
 2024-12-23  430.149994  431.239990  422.290009  424.380005  225600  Ferrari   
 2024-12-24  433.9

In [11]:
# Define F1 teams and their corresponding stock tickers
team_tickers = {
    'Mercedes': 'DMLRY',       # Mercedes parent company Daimler
    'Ferrari': 'RACE',         # Ferrari N.V.
    'Red Bull': None,          # Private team, use parent company
    'McLaren': None,           # Private company
    'Alpine': 'RNSDF',         # Renault
    'Aston Martin': 'AML.L',   # Aston Martin
    'Williams': None,          # Private team
    'AlphaTauri': None,        # Private team
    'Alfa Romeo': 'STLA',      # Stellantis (parent company)
    'Haas': None               # Private team
}

# Filter to only publicly traded companies
public_teams = {k: v for k, v in team_tickers.items() if v is not None}

def collect_stock_data(team_tickers, start_date, end_date):
    """
    Collect stock data for F1 teams using Yahoo Finance
    """
    stock_data = {}
    
    for team, ticker in team_tickers.items():
        if ticker:
            try:
                print(f"Collecting stock data for {team} ({ticker})...")
                data = yf.download(ticker, start=start_date, end=end_date)
                data['Team'] = team
                data['Ticker'] = ticker
                stock_data[team] = data
            except Exception as e:
                print(f"Error collecting data for {team} ({ticker}): {e}")
    
    return stock_data

# Set date range - use one year before F1 data to have enough estimation window
start_date = '2019-01-01'
end_date = '2024-12-31'

# Collect stock data
stock_data = collect_stock_data(public_teams, start_date, end_date)

# Combine all stock data into a single DataFrame
combined_stock_data = pd.concat([df for df in stock_data.values()])
combined_stock_data.reset_index(inplace=True)
combined_stock_data.rename(columns={'index': 'Date'}, inplace=True)

Collecting stock data for Mercedes (DMLRY)...


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['DMLRY']: YFTzMissingError('possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Collecting stock data for Ferrari (RACE)...
Collecting stock data for Alpine (RNSDF)...
Collecting stock data for Aston Martin (AML.L)...
Collecting stock data for Alfa Romeo (STLA)...





In [13]:
def align_race_stock_data(races_df, results_df, stock_data):
    """
    Align race events with corresponding stock data
    """
    # Create a DataFrame to store aligned data
    aligned_data = pd.DataFrame()
    
    # Process each race
    for _, race in races_df.iterrows():
        race_date = race['EventDate']
        race_name = race['EventName']
        race_year = race['year']
        
        # Get race results for this race
        race_results = results_df[(results_df['event'] == race_name) & 
                                  (results_df['year'] == race_year)]
        
        # For each team with stock data
        for team, ticker_data in stock_data.items():
            # Check if team participated in this race
            team_result = race_results[race_results['TeamName'] == team]
            
            if not team_result.empty:
                # Convert race date to datetime
                race_datetime = pd.to_datetime(race_date)
                
                # Find nearest trading days (some races are on weekends when markets are closed)
                # Pre-race trading day (last trading day before race)
                pre_race_data = ticker_data[ticker_data['Date'] < race_datetime].sort_values('Date').tail(5)
                
                # Post-race trading days (first trading days after race)
                post_race_data = ticker_data[ticker_data['Date'] > race_datetime].sort_values('Date').head(5)
                
                if not pre_race_data.empty and not post_race_data.empty:
                    # Extract race results
                    position = team_result['Position'].values[0] if 'Position' in team_result.columns else None
                    points = team_result['Points'].values[0] if 'Points' in team_result.columns else None
                    
                    # Create event records
                    for i, post_day in enumerate(post_race_data['Date']):
                        if i < len(post_race_data):
                            event_data = {
                                'Race': race_name,
                                'Year': race_year,
                                'RaceDate': race_datetime,
                                'Team': team,
                                'TradingDay': post_day,
                                'DaysAfterRace': i+1,
                                'Position': position,
                                'Points': points,
                                'Open': post_race_data.iloc[i]['Open'],
                                'High': post_race_data.iloc[i]['High'],
                                'Low': post_race_data.iloc[i]['Low'],
                                'Close': post_race_data.iloc[i]['Close'],
                                'Volume': post_race_data.iloc[i]['Volume'],
                                'Adj Close': post_race_data.iloc[i]['Adj Close']
                            }
                            aligned_data = pd.concat([aligned_data, pd.DataFrame([event_data])])
    
    # Reset index
    aligned_data.reset_index(drop=True, inplace=True)
    return aligned_data

# Align F1 race data with stock data
aligned_data = align_race_stock_data(all_races, all_results, stock_data)

KeyError: 'EventDate'