In [5]:

import requests
from pprint import pprint
import pandas as pd
from datetime import datetime

In [6]:
#Base URL for API call
base_url = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/"

fatal_cases = []

# writing a for loop for all 50 states and from 2018 - 2021 so it doesn't exceed API call limit
for state in range(50):
    for y_since_2018 in range(4):
        response  = requests.get(base_url + f"GetCaseList?states={state}&fromYear={2018 + y_since_2018}&toYear={2018 + y_since_2018}&minNumOfVehicles=0&maxNumOfVehicles=10&format=json")
        data = response.json()
        fatal_cases += data["Results"][0]

In [7]:
df = pd.DataFrame(fatal_cases)
df.head()


Unnamed: 0,CountyName,CrashDate,Fatals,Peds,Persons,St_Case,State,StateName,TotalVehicles
0,TALLADEGA (121),/Date(1515150000000-0500)/,1,0,1,10001,1,Alabama,2
1,WALKER (127),/Date(1515390480000-0500)/,2,0,2,10002,1,Alabama,1
2,CHILTON (21),/Date(1515469800000-0500)/,1,0,2,10003,1,Alabama,2
3,BALDWIN (3),/Date(1515520920000-0500)/,1,0,2,10004,1,Alabama,1
4,JEFFERSON (73),/Date(1516363740000-0500)/,1,0,2,10005,1,Alabama,2


In [8]:
#filter out the problematic dates
df_filtered = df[df["CrashDate"] != "/Date(-62135578800000-0500)/"]
df_filtered.loc[:, "CrashDate"] = df_filtered["CrashDate"].apply(lambda x: datetime.fromtimestamp(int(x[6:19])/1000))

df_filtered.sort_values(by = "CrashDate").head()


Unnamed: 0,CountyName,CrashDate,Fatals,Peds,Persons,St_Case,State,StateName,TotalVehicles
12919,RIVERSIDE (65),2017-12-31 23:01:00,1,0,4,63463,6,California,2
28443,LAKE (69),2017-12-31 23:15:00,1,0,1,120156,12,Florida,1
80082,DOUGLAS (55),2017-12-31 23:20:00,1,0,2,310001,31,Nebraska,2
119481,GREGG (183),2017-12-31 23:22:00,1,0,1,480010,48,Texas,1
119477,DALLAS (113),2017-12-31 23:30:00,2,0,2,480006,48,Texas,2


In [10]:
us_states_dict = {
    'Alabama': 1,
    'Alaska': 2,
    'Arizona': 3,
    'Arkansas': 4,
    'California': 5,
    'Colorado': 6,
    'Connecticut': 7,
    'Delaware': 8,
    'Florida': 9,
    'Georgia': 10,
    'Hawaii': 11,
    'Idaho': 12,
    'Illinois': 13,
    'Indiana': 14,
    'Iowa': 15,
    'Kansas': 16,
    'Kentucky': 17,
    'Louisiana': 18,
    'Maine': 19,
    'Maryland': 20,
    'Massachusetts': 21,
    'Michigan': 22,
    'Minnesota': 23,
    'Mississippi': 24,
    'Missouri': 25,
    'Montana': 26,
    'Nebraska': 27,
    'Nevada': 28,
    'New Hampshire': 29,
    'New Jersey': 30,
    'New Mexico': 31,
    'New York': 32,
    'North Carolina': 33,
    'North Dakota': 34,
    'Ohio': 35,
    'Oklahoma': 36,
    'Oregon': 37,
    'Pennsylvania': 38,
    'Rhode Island': 39,
    'South Carolina': 40,
    'South Dakota': 41,
    'Tennessee': 42,
    'Texas': 43,
    'Utah': 44,
    'Vermont': 45,
    'Virginia': 46,
    'Washington': 47,
    'West Virginia': 48,
    'Wisconsin': 49,
    'Wyoming': 50
}


In [22]:
#list to store each person involved in the incidents
people_involved = []

for idx, row in df_filtered.iterrows():
    state_case = row["St_Case"]
    state_case_year = row["CrashDate"].strftime('%Y')
    state_number = us_states_dict[row["StateName"]]

    #Get the specific details for each crash
    try:
        response = requests.get(base_url + f"GetCaseDetails?stateCase={state_case}&caseYear={state_case_year}&state={state_number}&format=json")
        data = response.json()

        #Get the list of people who are involved in the crash
        persons_data = data["Results"][0][0]["CrashResultSet"]['Vehicles'][0]['Persons']

        for person in persons_data:
            # data to be collected for each person
            age = person["AGE"]

    except Exception as e:
        print("Cannot retrieve data. The follow error occured", e)


[{'AGE': '55',
  'AGENAME': '55 Years',
  'AIR_BAG': '20',
  'AIR_BAGNAME': 'Not Deployed',
  'ALC_DET': '9',
  'ALC_DETNAME': 'Not Reported',
  'ALC_RES': '0',
  'ALC_RESNAME': '0.000 % BAC',
  'ALC_STATUS': '2',
  'ALC_STATUSNAME': 'Test Given',
  'ATST_TYP': '1',
  'ATST_TYPNAME': 'Blood',
  'BODY_TYP': '66',
  'BODY_TYPNAME': 'Truck-tractor (Cab only, or with any number of trailing '
                  'unit; any weight)',
  'CERT_NO': None,
  'COUNTY': '121',
  'COUNTYNAME': 'TALLADEGA (121)',
  'CaseYear': '2018',
  'DAY': '5',
  'DEATH_DA': '5',
  'DEATH_DANAME': '5',
  'DEATH_HR': '6',
  'DEATH_HRNAME': '6:00-6:59',
  'DEATH_MN': '0',
  'DEATH_MNNAME': '0',
  'DEATH_MO': '1',
  'DEATH_MONAME': 'January',
  'DEATH_TM': '600',
  'DEATH_TMNAME': '600',
  'DEATH_YR': '2018',
  'DEATH_YRNAME': '2018',
  'DOA': '7',
  'DOANAME': 'Died at Scene',
  'DRINKING': '0',
  'DRINKINGNAME': 'No (Alcohol Not Involved)',
  'DRUGRES1': None,
  'DRUGRES1NAME': None,
  'DRUGRES2': None,
  'DRUGRES2

KeyboardInterrupt: 