In [1]:
import pandas as pd

In [2]:
import pandas as pd

def merge_f1_data(file_paths):
    """
    Merge F1 datasets with appropriate suffixes based on source files.
    
    Args:
        file_paths (dict): Dictionary with dataset names as keys and file paths as values
                          e.g., {'circuits': 'circuits.csv', 'races': 'races.csv', ...}
    
    Returns:
        pandas.DataFrame: Merged dataset with appropriate column suffixes
    """
    # Dictionary to store DataFrames
    dfs = {}
    
    # Read all CSV files
    for name, path in file_paths.items():
        try:
            dfs[name] = pd.read_csv(path)
            print(f"Loaded {name} dataset with {len(dfs[name])} rows")
        except FileNotFoundError:
            print(f"Error: File {path} not found")
            return None
        except Exception as e:
            print(f"Error loading {path}: {str(e)}")
            return None

    # Define merge sequence and keys
    merge_sequence = [
        ('circuits', 'races', 'circuitId', '_circuit', '_race'),
        ('races', 'results', 'raceId', '_race', '_result'),
        ('results', 'drivers', 'driverId', '_result', '_driver'),
        ('results', 'status', 'statusId', '_result', '_status')
    ]

    # Start with circuits DataFrame
    merged_df = dfs['circuits'].copy()

    # Perform merges in sequence
    for base, merge_with, key, suffix1, suffix2 in merge_sequence:
        try:
            if merge_with in dfs:
                merged_df = pd.merge(
                    merged_df,
                    dfs[merge_with],
                    on=key,
                    how='outer',
                    suffixes=(suffix1, suffix2)
                )
                print(f"Merged {merge_with} dataset successfully")
        except Exception as e:
            print(f"Error during merge of {merge_with}: {str(e)}")
            return None

    return merged_df

# Example usage:
file_paths = {
    'circuits': 'circuits.csv',
    'races': 'races.csv',
    'results': 'results.csv',
    'drivers': 'drivers.csv',
    'status': 'status.csv'
}

# Perform the merge
full_data = merge_f1_data(file_paths)

if full_data is not None:
    print("\nMerge completed successfully!")
    print(f"Final dataset shape: {full_data.shape}")
    # Display the first few rows of the merged dataset
    print("\nFirst few rows of the merged dataset:")
    print(full_data.head())

Loaded circuits dataset with 77 rows
Loaded races dataset with 1102 rows
Loaded results dataset with 25840 rows
Loaded drivers dataset with 857 rows
Loaded status dataset with 139 rows
Merged races dataset successfully
Merged results dataset successfully
Merged drivers dataset successfully
Merged status dataset successfully

Merge completed successfully!
Final dataset shape: (25867, 52)

First few rows of the merged dataset:
   circuitId   circuitRef                    name_circuit      location  \
0        2.0       sepang    Sepang International Circuit  Kuala Lumpur   
1       17.0     shanghai  Shanghai International Circuit      Shanghai   
2        3.0      bahrain   Bahrain International Circuit        Sakhir   
3        5.0     istanbul                   Istanbul Park      Istanbul   
4       11.0  hungaroring                     Hungaroring      Budapest   

    country       lat       lng  alt  \
0  Malaysia   2.76083  101.7380   18   
1     China  31.33890  121.2200    5   


In [None]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np
from collections import defaultdict
import webbrowser
import os

# Read the data
df = full_data

# Get unique years per location
location_data = defaultdict(lambda: {'years': set()})
for _, row in df.iterrows():
    if pd.isna(row['lat']) or pd.isna(row['lng']) or pd.isna(row['name_circuit']) or pd.isna(row['year']):
        continue
        
    key = (row['lat'], row['lng'], row['name_circuit'])
    location_data[key]['years'].add(int(row['year']))

# Convert to DataFrame format
location_info = []
for (lat, lng, name), data in location_data.items():
    if not data['years']:
        continue
        
    years = sorted(data['years'])
    ranges = []
    start = years[0]
    prev = start
    
    for year in years[1:] + [None]:
        if year != prev + 1:
            if start == prev:
                ranges.append(str(start))
            else:
                ranges.append(f"{start}-{prev}")
            start = year
        prev = year

    year_text = ", ".join(ranges)
    year_count = len(data['years'])
    
    location_info.append({
        'lat': lat,
        'lng': lng,
        'name_circuit': name,
        'year_count': year_count,
        'years': year_text
    })

location_counts = pd.DataFrame(location_info)

# Calculate marker sizes proportionally
max_years = location_counts['year_count'].max()
min_years = location_counts['year_count'].min()
marker_sizes = (location_counts['year_count'] - min_years) / (max_years - min_years) * 35 + 10

# F1 red color
f1_red = '#FF1E00'

# Create custom color scale using F1 red
custom_red_scale = [
    [0, 'rgba(255, 30, 0, 0.2)'],
    [0.5, 'rgba(255, 30, 0, 0.6)'],
    [1, 'rgba(255, 30, 0, 1)']
]

# Create the figure
fig = go.Figure()

# Add the base globe
fig.add_trace(go.Scattergeo(
    lon=location_counts['lng'],
    lat=location_counts['lat'],
    text=[f"{name}<br>Years of Racing: {count}<br>Years: {years}" 
          for name, count, years in 
          zip(location_counts['name_circuit'], 
              location_counts['year_count'],
              location_counts['years'])],
    mode='markers',
    marker=dict(
        size=marker_sizes,
        color=location_counts['year_count'],
        colorscale=custom_red_scale,
        showscale=True,
        colorbar=dict(
            title='Years of Racing',
            tickfont=dict(size=12, color='white'),
            title_font=dict(color='white'),
            x=1.02
        ),
        sizemode='diameter',
        opacity=0.8
    ),
    hovertemplate="<b>%{text}</b><br>" +
                  "<extra></extra>"
))

# Update the layout with space theme
fig.update_layout(
    title=dict(
        text='F1 Circuit History by Location',
        font=dict(size=24, color='white'),
        y=0.95,
        x=0.5,  # Center the title
        xanchor='center'  # Ensure title is centered
    ),
    showlegend=False,
    geo=dict(
        projection_type='orthographic',
        showland=True,
        showcountries=True,
        showocean=True,
        countrywidth=0.5,
        landcolor='rgba(150, 150, 150, 0.8)',
        oceancolor='rgba(20, 20, 40, 0.8)',
        bgcolor='rgba(0,0,0,0)',
        projection_rotation=dict(lon=30, lat=30, roll=0),
        showcoastlines=True,
        coastlinecolor='rgba(255, 255, 255, 0.4)',
        showframe=False,
        countrycolor='rgba(255, 255, 255, 0.2)'
    ),
    width=1200,
    height=800,
    autosize=True,
    paper_bgcolor='rgb(10, 10, 30)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(l=150, r=50, t=100, b=50)
)

# Add view buttons with updated style
views = [
    {'lon': 30, 'lat': 30, 'name': 'Europe/Africa'},
    {'lon': 90, 'lat': 30, 'name': 'Asia'},
    {'lon': -100, 'lat': 30, 'name': 'Americas'},
    {'lon': 135, 'lat': -30, 'name': 'Australia'}
]

buttons = []
for view in views:
    buttons.append(
        dict(
            args=[{
                'geo.projection.rotation': 
                {'lon': view['lon'], 'lat': view['lat'], 'roll': 0}
            }],
            label=view['name'],
            method='relayout'
        )
    )

fig.update_layout(
    updatemenus=[
        dict(
            type='buttons',
            showactive=True,
            buttons=buttons,
            x=-0.15,
            y=0.8,
            xanchor='left',
            yanchor='top',
            bgcolor='rgba(255, 255, 255, 0.1)',
            bordercolor="white",  # Changed border color to F1 red
            font=dict(size=12, color=f1_red),  # Changed text color to F1 red
            pad={"r": 10, "t": 10}  # Added some padding
        )
    ]
)

fig.show()

# Save as HTML
output_file = "f1_circuits_globe.html"
fig.write_html(output_file)

# Open in default browser
webbrowser.open('file://' + os.path.realpath(output_file))

True