In [10]:
import os
import requests
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [None]:
# Retrieve credentials from environment variables
pg_user = os.getenv('PG_USER')
pg_password = os.getenv('PG_PASSWORD')
pg_host = os.getenv('PG_HOST')
pg_db = os.getenv('PG_DB')

# Build the connection string
conn_str = f"postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}/{pg_db}"
engine = create_engine(conn_str)

In [None]:
# Step 1: Pull Qualifying Results from F1 API (2022)
url = "https://ergast.com/api/f1/2022/qualifying.json?limit=1000"


In [18]:
# Step 2: Navigate to Race results
races = data['MRData']['RaceTable']['Races']

In [20]:
# Step 3: Normalize the qualifying results
qualifying_data_2022 = []
for race in races:
    for result in race['QualifyingResults']:
        qualifying_data_2022.append({
            'season': race['season'],
            'round': race['round'],
            'race_name': race['raceName'],
            'circuit': race['Circuit']['circuitName'],
            'date': race['date'],
            'driver': result['Driver']['familyName'],
            'constructor': result['Constructor']['name'],
            'position': result['position'],
            'q1': result.get('Q1', None),
            'q2': result.get('Q2', None),
            'q3': result.get('Q3', None)
        })

In [21]:
# Step 4: Create DataFrame
df_qualifying_2022 = pd.DataFrame(qualifying_data_2022)
df_qualifying_2022.head()

Unnamed: 0,season,round,race_name,circuit,date,driver,constructor,position,q1,q2,q3
0,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Leclerc,Ferrari,1,1:31.471,1:30.932,1:30.558
1,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Verstappen,Red Bull,2,1:31.785,1:30.757,1:30.681
2,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Sainz,Ferrari,3,1:31.567,1:30.787,1:30.687
3,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Pérez,Red Bull,4,1:32.311,1:31.008,1:30.921
4,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Hamilton,Mercedes,5,1:32.285,1:31.048,1:31.238


In [22]:
# Step 5: Save to SQL
df_qualifying_2022.to_sql(
    name="qualifying_results_2022",
    con=engine,
    schema='raw',
    if_exists='replace',
    index=False
)
print("✅ Qualifying 2022 data uploaded to PostgreSQL (raw schema) successfully.")

✅ Qualifying 2022 data uploaded to PostgreSQL (raw schema) successfully.


In [23]:
# Step 1: Pull Race Results from F1 API (2022)
url = "https://ergast.com/api/f1/2022/results.json?limit=1000"
response = requests.get(url)
data = response.json()

In [24]:
# Step 2: Navigate to Race Results
races = data['MRData']['RaceTable']['Races']

In [25]:
# Step 3: Normalize the Race results
race_data_2022 = []
for race in races:
    for result in race['Results']:
        race_data_2022.append({
            'season': race['season'],
            'round': race['round'],
            'race_name': race['raceName'],
            'circuit': race['Circuit']['circuitName'],
            'date': race['date'],
            'driver': result['Driver']['familyName'],
            'constructor': result['Constructor']['name'],
            'grid': result['grid'],
            'finish': result['position'],
            'status': result['status'],
            'points': result['points']
        })

In [26]:
# Step 4: Create DataFrame and Save to CSV
df_race_2022 = pd.DataFrame(race_data_2022)
df_race_2022.head()

Unnamed: 0,season,round,race_name,circuit,date,driver,constructor,grid,finish,status,points
0,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Leclerc,Ferrari,1,1,Finished,26
1,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Sainz,Ferrari,3,2,Finished,18
2,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Hamilton,Mercedes,5,3,Finished,15
3,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Russell,Mercedes,9,4,Finished,12
4,2022,1,Bahrain Grand Prix,Bahrain International Circuit,2022-03-20,Magnussen,Haas F1 Team,7,5,Finished,10


In [27]:
# Step 5: Save 2022 Race Data to SQL
df_race_2022.to_sql(
    name="race_results_2022",
    con=engine,
    schema='raw',
    if_exists='replace',
    index=False
)
print("✅ Race 2022 data uploaded to PostgreSQL (raw schema) successfully.")

✅ Race 2022 data uploaded to PostgreSQL (raw schema) successfully.


In [29]:
# Step 1: Pull Qualifying Results from F1 API (2023)
url = "https://ergast.com/api/f1/2023/qualifying.json?limit=1000"
response = requests.get(url)
print("Status Code:", response.status_code)
print("Sample Keys:", response.json().keys())

Status Code: 200
Sample Keys: dict_keys(['MRData'])


In [30]:
# Step 2: Navigate to Race Results
races = response.json()['MRData']['RaceTable']['Races']

In [31]:
# Step 3: Normalize the qualifying results
qualifying_data = []
for race in races:
    for result in race['QualifyingResults']:
        qualifying_data.append({
            'season': race['season'],
            'round': race['round'],
            'race_name': race['raceName'],
            'circuit': race['Circuit']['circuitName'],
            'date': race['date'],
            'driver': result['Driver']['familyName'],
            'constructor': result['Constructor']['name'],
            'position': result['position'],
            'q1': result.get('Q1', None),
            'q2': result.get('Q2', None),
            'q3': result.get('Q3', None)
        })

In [32]:
# Step 4: Create DataFrame
df_qualifying = pd.DataFrame(qualifying_data)
df_qualifying.head()

Unnamed: 0,season,round,race_name,circuit,date,driver,constructor,position,q1,q2,q3
0,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Verstappen,Red Bull,1,1:31.295,1:30.503,1:29.708
1,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Pérez,Red Bull,2,1:31.479,1:30.746,1:29.846
2,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Leclerc,Ferrari,3,1:31.094,1:30.282,1:30.000
3,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Sainz,Ferrari,4,1:30.993,1:30.515,1:30.154
4,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Alonso,Aston Martin,5,1:31.158,1:30.645,1:30.336


In [33]:
# Step 5: Save 2023 Qualifying Data to SQL
df_qualifying.to_sql(
    name="qualifying_results_2023",
    con=engine,
    schema='raw',
    if_exists='replace',
    index=False
)
print("✅ Qualifying 2023 data uploaded to PostgreSQL (raw schema) successfully.")

✅ Qualifying 2023 data uploaded to PostgreSQL (raw schema) successfully.


In [34]:
# Step 1: Pull Race Results from F1 API (2023)
url = "https://ergast.com/api/f1/2023/results.json?limit=1000"
response = requests.get(url)
data = response.json()

In [35]:
# Step 2: Navigate to the race results
races = data['MRData']['RaceTable']['Races']

In [36]:
# Step 3: Normalize the race results
race_data = []

for race in races:
    for result in race['Results']:
        race_data.append({
            'season': race['season'],
            'round': race['round'],
            'race_name': race['raceName'],
            'circuit': race['Circuit']['circuitName'],
            'date': race['date'],
            'driver': result['Driver']['familyName'],
            'constructor': result['Constructor']['name'],
            'grid': result['grid'],          # Starting position
            'finish': result['position'],    # Final result
            'status': result['status'],      # Did not finish / finished / etc.
            'points': result['points']       # Points earned
        })

In [37]:
# Step 4: Create DataFrame
df_race = pd.DataFrame(race_data)
df_race.head()

Unnamed: 0,season,round,race_name,circuit,date,driver,constructor,grid,finish,status,points
0,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Verstappen,Red Bull,1,1,Finished,25
1,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Pérez,Red Bull,2,2,Finished,18
2,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Alonso,Aston Martin,5,3,Finished,15
3,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Sainz,Ferrari,4,4,Finished,12
4,2023,1,Bahrain Grand Prix,Bahrain International Circuit,2023-03-05,Hamilton,Mercedes,7,5,Finished,10


In [38]:
# Step 5: Save 2023 Race Data to SQL
df_race.to_sql(
    name="race_results_2023",
    con=engine,
    schema='raw',                  # ✅ target the 'raw' schema
    if_exists='replace',
    index=False
)
print("✅ Race 2023 data uploaded to PostgreSQL (raw schema) successfully.")

✅ Race 2023 data uploaded to PostgreSQL (raw schema) successfully.
