In [1]:
# --- Standard Library Imports ---
import random
import functools

# --- Third-Party Library Imports ---
import numpy as np
import pandas as pd

# --- Custom Library Imports ---
from models import genetic_ops
from utils.utilities import get_circuits_for_population
from utils.sql import get_table

# --- DEAP Imports ---
try:
    from deap import base, creator, tools, algorithms
except ImportError:
    print("DEAP library not found. Please install it using: pip install deap")
    exit()  # Exit or handle appropriately if DEAP is missing
    

In [2]:
# --- Main GA Execution ---

def set_default_params(params: dict) -> dict:
    """
    Update the given parameters dictionary with default values if not already set.

    Parameters:
    - params: dict, the input parameters.

    Returns:
    - dict, the updated parameters with defaults applied.
    """
    defaults = {
        "POPULATION_SIZE": 100,
        "CROSSOVER_PROB": 0.8,  # Probability of mating two individuals
        "MUTATION_PROB": 0.15,  # Probability of mutating an individual
        "NUM_GENERATIONS": 50,  # Start with fewer generations for testing
        "TOURNAMENT_SIZE": 5,  # For tournament selection
        "RANDOM_SEED": 42,
        "SEASON_YEAR": 2026, # For fitness calculation context
        "REGRESSION": False,  # Set to True for regression estimates
        "CLUSTERS": True,  # Set to True for clustering
    }
    # Update params with defaults if not already set
    for key, value in defaults.items():
        params.setdefault(key, value)
    return params

# Example usage
params = {}
params = set_default_params(params)


# Set random seed
random.seed(params["RANDOM_SEED"])
np.random.seed(params["RANDOM_SEED"])

In [3]:
# --- Prepare Scenario Data ---
def prepare_scenario(from_season: int = None, from_sample: int = None, from_input: list = None, verbose: bool = False):
    """
    Prepare the scenario data for the genetic algorithm.

    Parameters:
    - from_season: int, the season year to fetch data for.
    - from_sample: int, the sample size to fetch data for.
    - from_input: list, additional input for customization.
    - verbose: bool, whether to print detailed information.

    Returns:
    - circuits_df_scenario: pd.DataFrame, the prepared scenario data.
    - circuit_list_scenario: list, list of circuit names.
    """
    # Ensure only one argument is populated
    args = [from_season, from_sample, from_input]
    if sum(arg is not None for arg in args) != 1:
        raise ValueError("Only one of 'from_season', 'from_sample', or 'from_input' must be provided.")

    # Fetch and prepare the scenario data
    if from_season is not None:
        circuits_df_scenario = get_circuits_for_population(season=from_season)[['code', 'cluster_id', 'first_gp_probability', 'last_gp_probability']]
        if verbose:
            print(f"Scenario prepared using season: {from_season}")
    
    if from_sample is not None:
        circuits_df_scenario = get_circuits_for_population(n=from_sample, seed=RANDOM_SEED)[['code', 'cluster_id', 'first_gp_probability', 'last_gp_probability']]
        if verbose:
            print(f"Scenario prepared using sample size: {from_sample}")
        
    if from_input is not None:
        circuits_df_scenario = get_circuits_for_population(custom=from_input, verbose=True)[['code', 'cluster_id', 'first_gp_probability', 'last_gp_probability']]
        if verbose:
            print(f"Scenario prepared using custom input: {from_input}")
        
    circuits_df_scenario.columns = ['circuit_name', 'cluster_id', 'start_freq_prob', 'end_freq_prob']
    circuit_list_scenario = circuits_df_scenario['circuit_name'].tolist()
    
    # Print details for debugging if verbose
    if verbose:
        print(f"Optimizing for {len(circuit_list_scenario)} circuits.")
        print(f"circuit_list_scenario: {circuit_list_scenario}")
    
    return circuits_df_scenario

# Call the function to prepare the scenario

In [4]:
lst= [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
circuits_df_scenario = prepare_scenario(from_input=lst, verbose=True)

Generating circuits for custom population with n = 20.
Fetching circuit details for ID 1...
Fetched circuit data from the database.
Fetched circuit details for ID 1:    id  code_6    circuit_x       city_x       country_x  latitude  longitude  \
0   1  UKGSIL  Silverstone  Silverstone  United Kingdom   52.0786    -1.0169   

   first_gp_probability  last_gp_probability  
0                   0.0                  0.0  .
Fetched circuit details for ID 1:    id  code_6    circuit_x       city_x       country_x  latitude  longitude  \
0   1  UKGSIL  Silverstone  Silverstone  United Kingdom   52.0786    -1.0169   

   first_gp_probability  last_gp_probability  
0                   0.0                  0.0  .
Starting clusterization process...
Scaling coordinates...

Selected features: ['latitude', 'longitude']
Original Coordinates (first 5 rows):
[[52.0786 -1.0169]]

Scaled Coordinates (first 5 rows):
[[0. 0.]]

Feature scaling complete.
Determining optimal number of clusters (max_clusters=1

  prereq_custom = pd.concat([prereq_custom, circuit], ignore_index=True)


ValueError: max_clusters must be greater than or equal to min_clusters.

In [None]:
# --- DEAP Setup ---
# Create Fitness and Individual types
# weights=(-1.0,) means we want to minimize the fitness score
def deap_toolbox(circuits_df_scenario: pd.DataFrame, fitness_function: callable, params:dict, seed:int=None, verbose=False):
    """
    Create and configure a DEAP toolbox for the genetic algorithm.

    Parameters:
    - circuits_df_scenario: pd.DataFrame, the scenario data for the circuits.
    - fitness_function: callable, the fitness function to evaluate individuals.
    - seed: int, random seed for reproducibility.
    - verbose: bool, whether to print detailed information.

    Returns:
    - toolbox: deap.base.Toolbox, the configured DEAP toolbox.
    - stats: deap.tools.support.Statistics, the statistics object for logging.
    """
    if verbose:
        print("Initializing DEAP toolbox...")

    # Create Fitness and Individual types
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)

    # Initialize Toolbox
    toolbox = base.Toolbox()

    # Register functions for creating individuals and population
    initial_pop_list = genetic_ops.generate_initial_population(circuits_df_scenario, params['POPULATION_SIZE'], seed=seed)
    toolbox.register("population_custom", lambda: [creator.Individual(ind) for ind in initial_pop_list])

    # Register the genetic operators
    toolbox.register("evaluate", fitness_function, 
                     circuits_df=circuits_df_scenario, 
                     season=params['SEASON_YEAR'], 
                     regression=params['REGRESSION'],
                     clusters=params['CLUSTERS'], 
                     verbose=verbose)
    toolbox.register("mate", functools.partial(genetic_ops.order_crossover_deap, toolbox))
    toolbox.register("mutate", functools.partial(genetic_ops.swap_mutation_deap, toolbox))
    toolbox.register("select", tools.selTournament, tournsize=params['TOURNAMENT_SIZE'])

    # Statistics and Hall of Fame
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    # Create the Hall of Fame object - stores the best individual found
    hof = tools.HallOfFame(1) # Store only the single best

    if verbose:
        print("DEAP toolbox initialized successfully.")

    return toolbox, stats, hof

tbx, sts, hof = deap_toolbox(circuits_df_scenario, genetic_ops.calculate_fitness, params, seed=params['RANDOM_SEED'], verbose=True)

Initializing DEAP toolbox...
DEAP toolbox initialized successfully.


In [None]:
def run_genetic_algorithm(toolbox, stats, hof, params, verbose=False):
    """
    Run the genetic algorithm using the provided toolbox, stats, and hall of fame.

    Parameters:
    - toolbox: deap.base.Toolbox, the configured DEAP toolbox.
    - stats: deap.tools.support.Statistics, the statistics object for logging.
    - hof: deap.tools.support.HallOfFame, the hall of fame object to store the best individual.
    - params: dict, the parameters for the genetic algorithm.
    - verbose: bool, whether to print detailed information during execution.

    Returns:
    - population: list, the final population after running the genetic algorithm.
    - logbook: deap.tools.Logbook, the logbook containing statistics for each generation.
    - best_individual: list, the best individual found by the genetic algorithm.
    - best_fitness: float, the fitness score of the best individual.
    """
    if verbose:
        print("\n--- Starting Genetic Algorithm ---")
        print(f"Parameters: {params}")

    # Create the initial population
    population = toolbox.population_custom()
    if verbose:
        print(f"Initial Population Size: {len(population)}")

    # Run the genetic algorithm
    population, logbook = algorithms.eaSimple(
        population, toolbox,
        cxpb=params['CROSSOVER_PROB'],
        mutpb=params['MUTATION_PROB'],
        ngen=params['NUM_GENERATIONS'],
        stats=stats,
        halloffame=hof,
        verbose=verbose
    )

    # Retrieve the best individual and its fitness
    best_individual = hof[0]
    best_fitness = best_individual.fitness.values[0]

    if verbose:
        print("\n--- Genetic Algorithm Finished ---")
        print(f"Best Individual Found (Calendar Sequence): {best_individual}")
        print(f"Best Fitness Score Found: {best_fitness}")
        print(f"Logbook: {logbook}")

    return population, logbook, best_individual, best_fitness



In [None]:
pop, log, best_ind, best_fit = run_genetic_algorithm(tbx, sts, hof, params, verbose=True)


--- Starting Genetic Algorithm ---
Parameters: {'POPULATION_SIZE': 100, 'CROSSOVER_PROB': 0.8, 'MUTATION_PROB': 0.15, 'NUM_GENERATIONS': 50, 'TOURNAMENT_SIZE': 5, 'RANDOM_SEED': 42, 'SEASON_YEAR': 2026, 'REGRESSION': False, 'CLUSTERS': True}
Initial Population Size: 100
Regression is set to False. Using synthetic data for fitness calculation.
Getting travel logistics...
Travel logistics keys: ['JAPSUZ-CHISHA', 'CHISHA-SINMAR', 'SINMAR-QATLUS', 'QATLUS-SAUJED', 'SAUJED-UAEYAS', 'UAEYAS-BAHSAK', 'BAHSAK-MEXMEX', 'MEXMEX-USAMIA', 'USAMIA-USAAUS', 'USAAUS-USALAS', 'USALAS-CANMON', 'CANMON-BRASAO', 'BRASAO-AUSMEL', 'AUSMEL-AUSSPI', 'AUSSPI-SPACAT', 'SPACAT-UKGSIL', 'UKGSIL-AZEBAK', 'AZEBAK-BELSPA', 'BELSPA-NETZAN', 'NETZAN-ITAIMO', 'ITAIMO-HUNBUD', 'HUNBUD-MONMON', 'MONMON-ITAMON']
Effort scores: [4030.5833650412123, 117.41399999999999, 13062.910868716239, 13990.115650794063, 15878.714622962298, 1477.919226875703, 3806.7667173315604, 145.07999999999998, 143.8011, 8159.666459741226, 6219.33

In [None]:
coords = get_table('fone_geography')

In [None]:
impo
# Filter coords for rows where code_6 is in best_individual
filtered_coords = coords[coords['code_6'].isin(best_ind)]

# Order the DataFrame based on the order of best_individual
ordered_coords = filtered_coords.set_index('code_6').loc[best_ind].reset_index()

# Add the generated calendar to the ordered_coords DataFrame
ordered_coords['calendar'] = generate_f1_calendar(year=2026, n=len(ordered_coords), verbose=False)


NameError: name 'generate_f1_calendar' is not defined

In [None]:
import folium
from folium import plugins

# Create a map centered at the midpoint of the coordinates
mid_lat = (ordered_coords['latitude'].min() + ordered_coords['latitude'].max()) / 2
mid_lon = (ordered_coords['longitude'].min() + ordered_coords['longitude'].max()) / 2
map_chart = folium.Map(location=[mid_lat, mid_lon], zoom_start=3)

# Add markers for all coordinates
for _, row in ordered_coords.iterrows():
    folium.Marker(location=[row['latitude'], row['longitude']], popup=row['code_6']).add_to(map_chart)

# Add arrows to show the sequence from the first to the last coordinate
for i in range(len(ordered_coords) - 1):
    start = ordered_coords.iloc[i]
    end = ordered_coords.iloc[i + 1]
    arrow = plugins.PolyLineOffset(
        locations=[[start['latitude'], start['longitude']], [end['latitude'], end['longitude']]],
        color='blue',
        weight=5,
        offset=0
    )
    map_chart.add_child(arrow)
    # Add tooltips and mark start and end circuits differently
    for idx, row in ordered_coords.iterrows():
        tooltip = f"Code: {row['code_6']}, Date: {row['calendar']}, Circuit: {row['circuit_x']}, City: {row['city_x']}, Country: {row['country_x']}"
        if idx == 0:  # Start circuit
            folium.Marker(
                location=[row['latitude'], row['longitude']],
                popup=row['code_6'],
                tooltip=tooltip,
                icon=folium.Icon(color='green', icon='play', prefix='fa')
            ).add_to(map_chart)
        elif idx == len(ordered_coords) - 1:  # End circuit
            folium.Marker(
                location=[row['latitude'], row['longitude']],
                popup=row['code_6'],
                tooltip=tooltip,
                icon=folium.Icon(color='red', icon='stop', prefix='fa')
            ).add_to(map_chart)
        else:  # Intermediate circuits
            folium.Marker(
                location=[row['latitude'], row['longitude']],
                popup=row['code_6'],
                tooltip=tooltip
            ).add_to(map_chart)
# Display the map
map_chart

Unnamed: 0,id,circuit_x,city_x,country_x,continent,latitude,longitude,existing,months_to_avoid,traditional_months,notes,code_6,first_gp_probability,last_gp_probability
0,1,Silverstone,Silverstone,United Kingdom,Europe,52.0786,-1.0169,1,"[11, 12, 1, 2]",[7],UK winters are cold/rainy; race usually mid-se...,UKGSIL,0.0,0.0
1,2,Budapest,Mogyoród,Hungary,Europe,47.5789,19.2486,1,[],"[7, 8]","Good summer weather, scheduled pre-summer break",HUNBUD,0.0,0.0
2,3,Monza,Monza,Italy,Europe,45.6156,9.2811,1,"[11, 12, 1, 2]",[9],Held in September post-summer break,ITAMON,0.0,0.0
3,4,São Paulo,São Paulo,Brazil,South America,-23.7036,-46.6997,1,[],[11],"Rainy season possible, usually a late-season race",BRASAO,0.0,0.269231
4,5,Sakhir,Sakhir,Bahrain,Asia,26.0325,50.5106,1,"[6, 7, 8, 9]",[3],Avoid summer heat; often season opener,BAHSAK,0.230769,0.0


In [None]:
best_individual

['JAPSUZ',
 'USALAS',
 'USAAUS',
 'MEXMEX',
 'USAMIA',
 'CANMON',
 'BRASAO',
 'SAUJED',
 'QATLUS',
 'UAEYAS',
 'BAHSAK',
 'AZEBAK',
 'AUSSPI',
 'BELSPA',
 'NETZAN',
 'UKGSIL',
 'SPACAT',
 'ITAIMO',
 'MONMON',
 'ITAMON',
 'HUNBUD',
 'CHISHA',
 'SINMAR',
 'AUSMEL']