In [20]:
import requests
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def get_total_races(season):
    url = f'http://ergast.com/api/f1/{season}.json'
    response = requests.get(url)
    season_data = response.json()

    if not season_data['MRData']['RaceTable']['Races']:
        print(f"No data found for season {season}.")
        return 0

    total_races = len(season_data['MRData']['RaceTable']['Races'])
    return total_races

def fetch_race_data(season, race_number):
    url = f'http://ergast.com/api/f1/{season}/{race_number}/results.json'
    response = requests.get(url)
    race_data = response.json()

    if not race_data['MRData']['RaceTable']['Races']:
        print(f"No data found for season {season}, race number {race_number}.")
        return pd.DataFrame()

    race_results = race_data['MRData']['RaceTable']['Races'][0]['Results']
    
    race_df = pd.DataFrame([{
        'driver_id': result['Driver']['driverId'],
        'driver_name': f"{result['Driver']['givenName']} {result['Driver']['familyName']}",
        'constructor_id': result['Constructor']['constructorId'],
        'constructor_name': result['Constructor']['name'],  # Fetching constructor name
        'grid_position': result['grid'],
        'laps': result['laps'],
        'status': result['status'],
        'position': result['position'],
        'points': result['points'],
        'fastest_lap_time': result.get('FastestLap', {}).get('Time', {}).get('time', None),
        'constructor_points': result['Constructor']['constructorId']
    } for result in race_results])
    
    race_df['race_number'] = race_number
    
    return race_df

def fetch_previous_races_data(season, race_number):
    all_race_data = pd.DataFrame()
    
    for race in range(1, race_number):
        print(f"Fetching data for race round {race}...")
        race_data = fetch_race_data(season, race)
        all_race_data = pd.concat([all_race_data, race_data], ignore_index=True)
    
    return all_race_data

def prepare_features(data):
    data['driver_id'] = pd.factorize(data['driver_id'])[0]
    data['constructor_id'] = pd.factorize(data['constructor_id'])[0]
    data['grid_position'] = data['grid_position'].astype(int)
    data['points'] = data['points'].astype(float)
    data['fastest_lap_time'] = data['fastest_lap_time'].fillna('0:00:00')
    data['winner'] = (data['position'] == '1').astype(int)
    
    X = data[['driver_id', 'constructor_id', 'grid_position', 'points', 'race_number']]
    y = data['winner']
    
    return X, y

def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred) * 100  # Convert to percentage
    print(f'Model accuracy: {accuracy:.2f}%')  # Display accuracy in percentage
    
    return model

def predict_winner(model, season, race_number):
    race_data = fetch_race_data(season, race_number)
    
    if race_data.empty:
        return
    
    X_race, _ = prepare_features(race_data)
    
    predictions = model.predict(X_race)
    
    winner_index = predictions.argmax()
    predicted_winner_id = race_data.iloc[winner_index]['driver_id']
    predicted_winner_name = race_data.iloc[winner_index]['driver_name']
    predicted_constructor_name = race_data.iloc[winner_index]['constructor_name']  # Get the constructor's name
    
    print(f'Predicted winner for race {race_number} in season {season}:')
    print(f'Driver: {predicted_winner_name} ')
    print(f'Team: {predicted_constructor_name}')

if __name__ == "__main__":
    season = int(input("Enter the season (e.g., 2022): "))
    total_races = get_total_races(season)
    
    if total_races == 0:
        exit()

    print(f'Total races in season {season}: {total_races}')
    
    race_number = int(input(f"Enter the race number (1 to {total_races}): "))
    
    if race_number < 1 or race_number > total_races:
        print(f"Please enter a valid race number between 1 and {total_races}.")
        exit()

    previous_races_data = fetch_previous_races_data(season, race_number)

    X, y = prepare_features(previous_races_data)

    model = train_model(X, y)

    predict_winner(model, season, race_number)


Enter the season (e.g., 2022):  2018


Total races in season 2018: 21


Enter the race number (1 to 21):  20


Fetching data for race round 1...
Fetching data for race round 2...
Fetching data for race round 3...
Fetching data for race round 4...
Fetching data for race round 5...
Fetching data for race round 6...
Fetching data for race round 7...
Fetching data for race round 8...
Fetching data for race round 9...
Fetching data for race round 10...
Fetching data for race round 11...
Fetching data for race round 12...
Fetching data for race round 13...
Fetching data for race round 14...
Fetching data for race round 15...
Fetching data for race round 16...
Fetching data for race round 17...
Fetching data for race round 18...
Fetching data for race round 19...
Model accuracy: 100.00%
Predicted winner for race 20 in season 2018:
Driver: Lewis Hamilton 
Team: Mercedes
