In [1]:
import pandas as pd
import requests
import time
from datetime import date, datetime

from config import *

########################################
# Main function to download complaints
########################################
# Function to get complaints from NHTSA API
import requests
from requests.exceptions import Timeout, RequestException
import time

# Get rating years
url = 'https://api.nhtsa.gov/SafetyRatings'
response = requests.get(url).json()
rating_years = pd.DataFrame(response['Results'])
db = pg_connect()
rating_years.to_sql('ratings_years',db,index=False,if_exists='replace')
db.dispose()

Configuration loaded successfully.


In [21]:
# Get makes for ratings year
def get_makes_for_rating_year(year,retries=3, timeout=30):
    url = f'https://api.nhtsa.gov/SafetyRatings/modelyear/{year}'
    attempt = 0
    while attempt < retries:
            try:
                # Make the GET request to the NHTSA API with a timeout
                response = requests.get(url, timeout=timeout)
                
                # Check if the request was successful
                if response.status_code == 200:
                    # Return the JSON response
                    return response.json().get('Results', [])
                elif response.status_code == 400:
                    return None
                else:
                    # Return an error message
                    return None
            except Timeout:
                # Handle timeout exception
                attempt += 1
                print(f"Attempt {attempt} timed out. Retrying...")
                time.sleep(5)  # wait before retrying
            except RequestException as e:
                attempt += 1
                # Handle other request exceptions
                print(f"Request failed: {e}")
                time.sleep(5)
            except Exception as e:
                attempt += 1
                print(f"Request failed: {e}")
                time.sleep(5)

    # If all attempts fail, return None
    print("All attempts to contact the API have failed.")
    return None

# download makes for rating years
db = pg_connect()
for year in rating_years['ModelYear'][rating_years['ModelYear']>=2019]:
    temp =get_makes_for_rating_year(year)
    temp_df = pd.DataFrame(temp)
    temp_df.to_sql('ratings_makes_for_years',db,index=False,if_exists='append')
db.dispose()

pg_clean_table('ratings_makes_for_years')

Duplicates removed from ratings_makes_for_years


In [22]:
# Get makes for ratings year
def get_models_for_make_rating_years(year,make,retries=3, timeout=30):
    url = f'https://api.nhtsa.gov/SafetyRatings/modelyear/{year}/make/{make}'
    attempt = 0
    while attempt < retries:
            try:
                # Make the GET request to the NHTSA API with a timeout
                response = requests.get(url, timeout=timeout)
                
                # Check if the request was successful
                if response.status_code == 200:
                    # Return the JSON response
                    return response.json().get('Results', [])
                elif response.status_code == 400:
                    return None
                else:
                    # Return an error message
                    return None
            except Timeout:
                # Handle timeout exception
                attempt += 1
                print(f"Attempt {attempt} timed out. Retrying...")
                time.sleep(5)  # wait before retrying
            except RequestException as e:
                attempt += 1
                # Handle other request exceptions
                print(f"Request failed: {e}")
                time.sleep(5)
            except Exception as e:
                attempt += 1
                print(f"Request failed: {e}")
                time.sleep(5)

    # If all attempts fail, return None
    print("All attempts to contact the API have failed.")
    return None
    

# Create table to track model updates
if 'ratings_model_download_tracker' not in pg_tables():
    query = """
create table ratings_model_download_tracker as
select
	*,
	CURRENT_TIMESTAMP - interval '1000 years' as models_last_updated,
    0 as models_downloaded
from ratings_makes_for_years
"""
    pg_execute(query)
    print("ratings_model_download_tracker table created")

# Update model download tracker
pg_execute("""
INSERT INTO ratings_model_download_tracker
select distinct on ("ModelYear","Make","VehicleId")
	"ModelYear",
	"Make",
    "VehicleId",
	CURRENT_TIMESTAMP - interval '1000 years' as models_last_updated,
    0 as models_downloaded
from ratings_makes_for_years
where ("ModelYear","Make","VehicleId") not in (select "ModelYear","Make","VehicleId" from ratings_model_download_tracker)
""")
print("ratings_model_download_tracker updated")



ratings_model_download_tracker updated


In [23]:
ratings_model_download_tracker = pg_query("""
select
    *
from ratings_model_download_tracker
where models_last_updated < current_date - interval '15 days'
and "ModelYear"::int >= extract(year from current_date) - 5
""")

db = pg_connect()
for _,row in ratings_model_download_tracker.iterrows():
    temp = get_models_for_make_rating_years(row['ModelYear'],row['Make'])
    temp_df = pd.DataFrame(temp)
    temp_df.to_sql('ratings_models',db,index=False,if_exists='append')
    time.sleep(1)
    with db.connect() as connection:
        query = text("""
                   update ratings_model_download_tracker
                   set models_last_updated = current_timestamp, models_downloaded = :a
                   where "ModelYear"::int = :x and "Make" = :y and "VehicleId" = :z
                   """)
        connection.execute(query,{'a':len(temp_df),'x':row['ModelYear'],'y':row['Make'],'z':row['VehicleId']})
        connection.commit()
    print(f'rating models for {row['ModelYear']} {row['Make']} updated')
db.dispose()

pg_clean_table('ratings_models')

rating models for 2019 ACURA updated
rating models for 2019 ALFA ROMEO updated
rating models for 2019 AUDI updated
rating models for 2019 BENTLEY updated
rating models for 2019 BMW updated
rating models for 2019 BUICK updated
rating models for 2019 CADILLAC updated
rating models for 2019 CHEVROLET updated
rating models for 2019 CHRYSLER updated
rating models for 2019 DODGE updated
rating models for 2019 FIAT updated
rating models for 2019 FORD updated
rating models for 2019 FREIGHTLINER updated
rating models for 2019 GENESIS updated
rating models for 2019 GMC updated
rating models for 2019 HONDA updated
rating models for 2019 HYUNDAI updated
rating models for 2019 INFINITI updated
rating models for 2019 JAGUAR updated
rating models for 2019 JEEP updated
rating models for 2019 KIA updated
rating models for 2019 LAND ROVER updated
rating models for 2019 LEXUS updated
rating models for 2019 LINCOLN updated
rating models for 2019 MASERATI updated
rating models for 2019 MAZDA updated
rating

In [46]:
if 'ratings_download_tracker' not in pg_tables():
    query = """
create table ratings_model_variants_download_tracker as
select
	*,
	CURRENT_TIMESTAMP - interval '1000 years' as variants_last_updated,
    0 as total_variants
from ratings_models
"""
    pg_execute(query)
    print("ratings_model_variants_download_tracker table created")

# Update complaint download tracker
pg_execute("""
INSERT INTO ratings_model_variants_download_tracker
select distinct on ("ModelYear","Make","Model","VehicleId")
	"ModelYear",
	"Make",
    "Model",
    "VehicleId",     
	CURRENT_TIMESTAMP - interval '1000 years' as variants_last_updated,
    0 as total_variants
from ratings_models
where ("ModelYear","Make","Model","VehicleId") not in (select "ModelYear","Make","Model","VehicleId" from ratings_model_variants_download_tracker)
""")

ratings_model_variants_download_tracker table created


'done'

In [36]:
# Get makes for ratings year
def get_model_variants(year,make,model,retries=3, timeout=30):
    url = f'https://api.nhtsa.gov/SafetyRatings/modelyear/{year}/make/{make}/model/{model}'
    attempt = 0
    while attempt < retries:
            try:
                # Make the GET request to the NHTSA API with a timeout
                response = requests.get(url, timeout=timeout)
                
                # Check if the request was successful
                if response.status_code == 200:
                    # Return the JSON response
                    return response.json().get('Results', [])
                elif response.status_code == 400:
                    return None
                else:
                    # Return an error message
                    return None
            except Timeout:
                # Handle timeout exception
                attempt += 1
                print(f"Attempt {attempt} timed out. Retrying...")
                time.sleep(5)  # wait before retrying
            except RequestException as e:
                attempt += 1
                # Handle other request exceptions
                print(f"Request failed: {e}")
                time.sleep(5)
            except Exception as e:
                attempt += 1
                print(f"Request failed: {e}")
                time.sleep(5)

    # If all attempts fail, return None
    print("All attempts to contact the API have failed.")
    return None

In [49]:
if 'ratings_download_tracker' in pg_tables():
    variants_download_tracker = pg_query("""
    select
        *
    from ratings_model_variants_download_tracker
    where variants_last_updated < current_date - interval '7 days'
    and "ModelYear" >= extract(year from current_date)
    and "VehicleId" not in (select "VehicleId" from ratings_download_tracker where rated = true)
    """)
else: 
    variants_download_tracker = pg_query("""
    select
        *
    from ratings_model_variants_download_tracker
    where variants_last_updated < current_date - interval '7 days'
    and "ModelYear" >= extract(year from current_date)
    """)

db = pg_connect()
for _, row in variants_download_tracker.iterrows():
    # Download variants
    variants = get_model_variants(row['ModelYear'],row['Make'],row['Model'])
    variants_df = pd.DataFrame(variants)
    variants_df['ModelYear'] = row['ModelYear']
    variants_df['Make'] = row['Make']
    variants_df['Model'] = row['Model']
    variants_df.to_sql('ratings_models_variants',db,index=False,if_exists='append')
    # Download rating for each model variant
    with db.connect() as connection:
        query = text("""
        update ratings_model_variants_download_tracker
        set variants_last_updated = current_timestamp, total_variants = :a
        where "ModelYear" = :x and "Make" = :y and "Model" = :z and "VehicleId" = :w
        """)
        connection.execute(query,{'a':variants_df.shape[0],'x':row['ModelYear'],'y':row['Make'],'z':row['Model'],'w':row['VehicleId']})
        connection.commit()
    print(f'{row["ModelYear"]} {row['Make']} {row["Model"]} variants downloaded')
    time.sleep(1)
pg_clean_table('ratings_models_variants')

2024 CADILLAC XT5 variants downloaded
2024 CADILLAC CT5-V variants downloaded
2024 AUDI SQ7 variants downloaded
2024 MERCEDES-BENZ MERCEDES-BENZ ESPRINTER (SO) variants downloaded
2024 BUICK ENVISTA variants downloaded
2024 FORD F-250 (CREW CAB) variants downloaded
2024 FORD TRANSIT VAN BEV variants downloaded
2024 BMW M4 CONVERTIBLE variants downloaded
2024 BMW X6 M variants downloaded
2024 MERCEDES-BENZ SPRINTER 2500 12 PASSENGER VAN variants downloaded
2024 FORD EXPLORER variants downloaded
2024 JEEP WAGONEER variants downloaded
2024 PORSCHE TAYCAN GTS SPORT TURISMO variants downloaded
2024 BENTLEY FLYING SPUR HYBRID variants downloaded
2024 PORSCHE 718 CAYMAN GTS 4.0 variants downloaded
2024 CHEVROLET CORVETTE variants downloaded
2024 MITSUBISHI MIRAGE G4 variants downloaded
2024 MERCEDES-BENZ GT COUPE variants downloaded
2024 MERCEDES-BENZ EQE variants downloaded
2024 JAGUAR E-PACE variants downloaded
2024 SUBARU CROSSTREK WILDERNESS variants downloaded
2024 LEXUS RC 350 variants 

In [93]:
# Get makes for ratings year
def get_rating(vehicleid,retries=3, timeout=30):
    url = f'https://api.nhtsa.gov/SafetyRatings/VehicleId/{vehicleid}'
    attempt = 0
    while attempt < retries:
            try:
                # Make the GET request to the NHTSA API with a timeout
                response = requests.get(url, timeout=timeout)
                
                # Check if the request was successful
                if response.status_code == 200:
                    # Return the JSON response
                    return response.json().get('Results', [])
                elif response.status_code == 400:
                    return None
                else:
                    # Return an error message
                    return None
            except Timeout:
                # Handle timeout exception
                attempt += 1
                print(f"Attempt {attempt} timed out. Retrying...")
                time.sleep(5)  # wait before retrying
            except RequestException as e:
                attempt += 1
                # Handle other request exceptions
                print(f"Request failed: {e}")
                time.sleep(5)
            except Exception as e:
                attempt += 1
                print(f"Request failed: {e}")
                time.sleep(5)

    # If all attempts fail, return None
    print("All attempts to contact the API have failed.")
    return None

if 'ratings_download_tracker' not in pg_tables():
    query = """
    create table ratings_download_tracker as 
    select
        *,
        current_timestamp - interval '1000 years' as ratings_last_updated,
        false as fetched_ratings,
        false as rated
    from ratings_models_variants            
    """
    pg_execute(query)

# Update complaint download tracker
pg_execute("""
INSERT INTO ratings_download_tracker
select distinct on ("VehicleId")
	*,
	current_timestamp - interval '1000 years' as ratings_last_updated,
    false as feched_ratings,
    false as rated
from ratings_models_variants
where ("VehicleId") not in (select "VehicleId" from ratings_download_tracker)
""")

'done'

In [97]:
ratings_download_tracker = pg_query("""
select 
    *
from ratings_download_tracker
where fetched_ratings = false
union all
(select
    *
from ratings_download_tracker
where fetched_ratings = true
and rated = false
and ratings_last_updated < current_date - interval '14 days'
and "ModelYear"::int = extract(year from current_date)
limit 50
)
union all 
(select
    *
from ratings_download_tracker
where fetched_ratings = true
and rated = false
and "ModelYear"::int > extract(year from current_date)
)
""")

db = pg_connect()
t = pg_query("select current_timestamp")['current_timestamp'][0]
for _,row in ratings_download_tracker.iterrows():
    ratings = get_rating(row['VehicleId'])
    ratings_df = pd.DataFrame(ratings)[['OverallRating', 'OverallFrontCrashRating',
       'FrontCrashDriversideRating', 'FrontCrashPassengersideRating',
       'OverallSideCrashRating', 'SideCrashDriversideRating',
       'SideCrashPassengersideRating',
       'combinedSideBarrierAndPoleRating-Front',
       'combinedSideBarrierAndPoleRating-Rear', 'sideBarrierRating-Overall',
       'RolloverRating', 'RolloverRating2', 'RolloverPossibility',
       'RolloverPossibility2', 'dynamicTipResult', 'SidePoleCrashRating',
       'NHTSAElectronicStabilityControl', 'NHTSAForwardCollisionWarning',
       'NHTSALaneDepartureWarning', 'ModelYear', 'Make', 'Model',
       'VehicleDescription', 'VehicleId']]
    ratings_df['rating_updated_on'] = t
    ratings_df.to_sql('ratings',db,index=False,if_exists='append')
    if ratings_df['OverallRating'].iloc[0] == 'Not Rated':
        rated = False
    else:
        rated = True

    with db.connect() as connection:
        query = text("""
        update ratings_download_tracker
        set 
            ratings_last_updated = :t, 
            fetched_ratings = true, 
            rated = :r
        where  "VehicleId" = :a
        """)
        connection.execute(query,{'a':row['VehicleId'],'r':rated,'t':t})
        connection.commit()
    print(f'{row['VehicleDescription']} ratings fetched.')
    time.sleep(1)
db.dispose()
pg_execute("drop table if exists ratings_backup")
pg_execute("""
create table ratings_backup as
select distinct on ("VehicleId")
*
from ratings
order by "VehicleId", rating_updated_on desc
""")
pg_execute("delete from ratings")
pg_execute("""
insert into ratings
select * from ratings_backup
""")
pg_execute("drop table ratings_backup")
print("ratings updated")

2025 Honda CR-V SUV FWD ratings fetched.
2025 Lexus NX 350h SUV AWD ratings fetched.
2025 Lexus NX 250 SUV FWD ratings fetched.
2025 Ford Explorer SUV RWD ratings fetched.
2025 Ram Ram 1500 Crew Cab PU/CC RWD ratings fetched.
2025 Subaru Outback Wilderness SW AWD ratings fetched.
2025 Ford Explorer HEV SUV 4WD ratings fetched.
2025 Lexus NX 350 SUV AWD ratings fetched.
2025 Ram Ram 1500 Crew Cab PU/CC 4WD ratings fetched.
2025 Ram Ram 1500 Quad Cab PU/EC 4WD ratings fetched.
2025 Acura MDX SUV AWD ratings fetched.
2025 Ford Explorer HEV SUV RWD ratings fetched.
2025 Ram Ram 1500 Crew Cab TRX PU/CC 4WD ratings fetched.
2025 Ford Explorer SUV 4WD ratings fetched.
2025 Audi SQ7 SUV AWD ratings fetched.
2025 Lincoln Aviator SUV 4WD ratings fetched.
2025 Honda HR-V SUV AWD ratings fetched.
2025 Lexus NX 250 SUV AWD ratings fetched.
2025 Subaru Forester SUV AWD ratings fetched.
2025 Kia Sorento Hybrid SUV AWD ratings fetched.
2025 Lincoln Aviator SUV RWD ratings fetched.
2025 Ram Ram 1500 Qu