In [1]:
!pip install Faker



In [None]:
import csv
from faker import Faker
import random

fake = Faker()

# Record counts
num_seasons = 20
num_drivers = 3000
num_teams = 3000
num_circuits = 3000
num_races = 3000
num_race_results = 3000
num_qualifying_results = 3000
num_pit_stops = 3000
num_penalties = 3000
num_team_standings = 3000

# Helper: create CSV writer with no quoting
# For files where commas may appear—teams, circuits, etc.—use QUOTE_MINIMAL
def make_writer(f, minimal_quote=False):
    if minimal_quote:
        return csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    else:
        return csv.writer(f, quoting=csv.QUOTE_NONE, escapechar='\\')

# 1. Seasons
with open('seasons.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['season_id','season_year','description'])
    for i in range(1, num_seasons+1):
        year = 2000 + i
        writer.writerow([i, year, f"Season {year} details"])

# 2. Drivers
with open('drivers.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['driver_id','first_name','last_name','nationality','birth_date'])
    for i in range(1, num_drivers+1):
        writer.writerow([
            i,
            fake.first_name(),
            fake.last_name(),
            fake.country(),
            fake.date_of_birth()
        ])

# 3. Teams
seen_names = set()
with open('teams.csv', 'w', newline='', encoding='utf8') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['team_id','team_name','base_location'])

    tid = 1
    while tid <= num_teams:
        name = fake.company() + " Racing"
        if name in seen_names:
            continue  # skip duplicates
        seen_names.add(name)

        location = fake.city()
        writer.writerow([tid, name, location])
        tid += 1

# 4. Circuits
with open('circuits.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['circuit_id','circuit_name','location','country'])
    for i in range(1, num_circuits+1):
        writer.writerow([
            i,
            fake.city()+" Circuit",
            fake.city(),
            fake.country()
        ])

# # 5. Races
with open('races.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['race_id','season_id','race_name','circuit_id','race_date','round_number'])
    for i in range(1, num_races+1):
        writer.writerow([
            i,
            random.randint(1, num_seasons),
            "Grand Prix "+fake.city(),
            random.randint(1, num_circuits),
            fake.date_between(start_date='-5y', end_date='today'),
            random.randint(1, 22)
        ])

# # 6. RaceResults
with open('race_results.csv', 'w', newline='', encoding='utf8') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    writer.writerow([
        'race_id', 'driver_id', 'team_id',
        'finishing_position','points_awarded','fastest_lap_time'
    ])

    seen = set()
    count = 0
    while count < num_race_results:
        r = random.randint(1, num_races)
        d = random.randint(1, num_drivers)
        key = (r, d)
        if key in seen:
            continue      # skip any duplicate pair
        seen.add(key)

        t   = random.randint(1, num_teams)
        pos = random.randint(1, 20)
        pts = round(random.uniform(0, 25), 2)
        ttime = fake.time()

        writer.writerow([r, d, t, pos, pts, ttime])
        count += 1

# # 7. QualifyingResults
with open('qualifying_results.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['race_id','driver_id','qualifying_position','qualifying_time'])
    for _ in range(num_qualifying_results):
        writer.writerow([
            random.randint(1, num_races),
            random.randint(1, num_drivers),
            random.randint(1, 20),
            fake.time()
        ])

# # 8. PitStops
with open('pit_stops.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['pit_stop_id','race_id','driver_id','pit_stop_number','pit_stop_time','laps_completed'])
    for i in range(1, num_pit_stops+1):
        writer.writerow([
            i,
            random.randint(1, num_races),
            random.randint(1, num_drivers),
            random.randint(1, 3),
            fake.time(),
            random.randint(1, 70)
        ])

# # 9. Penalties
with open('penalties.csv', 'w', newline='') as f:
    writer = make_writer(f)
    writer.writerow(['penalty_id','race_id','driver_id','penalty_type','penalty_points','description'])
    types = ["Time Penalty","Drive-through","Stop-and-go"]
    for i in range(1, num_penalties+1):
        writer.writerow([
            i,
            random.randint(1, num_races),
            random.randint(1, num_drivers),
            random.choice(types),
            random.randint(0,10),
            fake.sentence()
        ])

# # 10. TeamStandings
with open('team_standings.csv', 'w', newline='', encoding='utf8') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['standing_id','season_id','team_id','total_points','rank'])

    seen = set()
    sid = 1
    while sid <= num_team_standings:
        season = random.randint(1, num_seasons)
        team   = random.randint(1, num_teams)
        key    = (season, team)
        if key in seen:
            continue   # skip duplicate pair
        seen.add(key)
        pts = round(random.uniform(0, 500), 2)
        rk  = random.randint(1, 20)
        writer.writerow([sid, season, team, pts, rk])
        sid += 1