This data comes from https://www.nhtsa.gov/crash-data-systems/crash-report-sampling-system
Information about naming conventions, and other information can be found https://static.nhtsa.gov/nhtsa/downloads/CRSS/Manuals/Analytical%20User%20Manual/2016-2019%20CRSS%20Analytical%20User%20Manual%20-%20DOT%20HS%20813%20022.pdf

In [1]:
# import dependencies
import pandas as pd
import os
import psycopg2
import config as creds
from sqlalchemy import create_engine
import re

In [2]:
# import all the data
directory = r'Accidents_data'
files = {}
for filename in os.listdir(directory):
    if filename.endswith(".csv") or filename.endswith(".CSV"):
        files[filename[:-4]] = os.path.join(directory, filename)
        assert files[filename[:-4]] == os.path.join(directory, filename)
    else:
        continue
dfs = files.copy()

In [3]:
# Create a dictionary of the dataframes for easy referencing
for key in dfs:
    dfs[key] = pd.read_csv(dfs[key])

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
def connect():
    # set up connection to the postgres server.
    conn_string = "host="+creds.PGHOST + " port="+"5432"+" dbname="+creds.PGDATABASE+" user=" + creds.PGUSER \
                  +" password="+ creds.PGPASSWORD
    conn = psycopg2.connect(conn_string)
    print("Connected!")

    # Create a cursor object
    cursor = conn.cursor()
    
    return conn, cursor

In [5]:
# Setup Vehicle subset of data
cols_to_keep = ['CASENUM','VEH_NO','MAKENAME','MAK_MODNAME','MOD_YEAR']
vehicle_subset = dfs['vehicle'][cols_to_keep]
vehicle_subset.head(3)

Unnamed: 0,CASENUM,VEH_NO,MAKENAME,MAK_MODNAME,MOD_YEAR
0,201901174219,1,Ford,Ford Ranger,2006
1,201901176655,1,Dodge,Dodge Dart (2013 on. See model 001 for 1960-19...,2015
2,201901176655,2,Honda,Honda CR-V,2006


In [6]:
# Setup Accident subset of data
cols_to_keep = ['CASENUM','STRATUMNAME','REGIONNAME','URBANICITYNAME',
               'MONTHNAME','HOUR','ALCOHOLNAME','WEATHERNAME','MAN_COLLNAME','HARM_EVNAME', 'REL_ROADNAME']
accident_subset = dfs['accident'][cols_to_keep]
accident_subset.head(3)

Unnamed: 0,CASENUM,STRATUMNAME,REGIONNAME,URBANICITYNAME,MONTHNAME,HOUR,ALCOHOLNAME,WEATHERNAME,MAN_COLLNAME,HARM_EVNAME,REL_ROADNAME
0,201901174219,Stratum 10 - Other,"South (MD, DE, DC, WV, VA, KY, TN, NC, SC, GA,...",Rural Area,January,8,No Alcohol Involved,Cloudy,The First Harmful Event was Not a Collision wi...,Curb,On Median
1,201901176655,Stratum 6 - LMY PV Minor Injury,"South (MD, DE, DC, WV, VA, KY, TN, NC, SC, GA,...",Rural Area,January,2,Reported as Unknown,Clear,Angle,Motor Vehicle In-Transport,On Roadway
2,201901176667,Stratum 8 - NLMY PV Minor Injury,"South (MD, DE, DC, WV, VA, KY, TN, NC, SC, GA,...",Urban Area,January,5,Alcohol Involved,Clear,Angle,Motor Vehicle In-Transport,On Roadway


In [7]:
# Setup Person subset of data
cols_to_keep = ['CASENUM','VEH_NO','PER_NO','SEXNAME','AGE','DRINKINGNAME','AIR_BAGNAME','REST_USENAME']
person_subset = dfs['person'][cols_to_keep]
person_subset.head(10)

Unnamed: 0,CASENUM,VEH_NO,PER_NO,SEXNAME,AGE,DRINKINGNAME,AIR_BAGNAME,REST_USENAME
0,201901174219,1,1,Male,39,No (Alcohol Not Involved),Not Deployed,Shoulder and Lap Belt Used
1,201901176655,1,1,Male,20,Not Reported,Not Deployed,Shoulder and Lap Belt Used
2,201901176655,1,2,Male,19,Not Reported,Not Deployed,None Used/Not Applicable
3,201901176655,1,3,Male,999,Not Reported,Not Deployed,Reported as Unknown
4,201901176655,1,4,Male,999,Not Reported,Not Deployed,Reported as Unknown
5,201901176655,2,1,Female,42,Not Reported,Not Deployed,Shoulder and Lap Belt Used
6,201901176655,2,2,Male,47,Not Reported,Not Deployed,Shoulder and Lap Belt Used
7,201901176667,1,1,Male,37,Yes (Alcohol Involved),Not Deployed,Shoulder and Lap Belt Used
8,201901176667,2,1,Male,50,Not Reported,Not Deployed,Shoulder and Lap Belt Used
9,201901176694,1,1,Male,26,No (Alcohol Not Involved),Deployed- Front,None Used/Not Applicable


In [8]:
# Setup Accident subset of data
cols_to_keep = ['CASENUM','VEH_NO','MDRDSTRDNAME']
distract_subset = dfs['distract'][cols_to_keep]
distract_subset = distract_subset.rename(columns={"MDRDSTRDNAME":"Distraction"})
distract_subset.head(3)

Unnamed: 0,CASENUM,VEH_NO,Distraction
0,201901174219,1,Not Distracted
1,201901176655,1,Not Reported
2,201901176655,2,Not Reported


In [28]:
# Create subsets of the data to work with
accident_subset.to_csv("Accidents_data/accidents_subset.csv")
vehicle_subset.to_csv("Accidents_data/vehicle_subset.csv")
distract_subset.to_csv("Accidents_data/distract_subset.csv")
person_subset.to_csv("Accidents_data/person_subset.csv")

In [None]:
# Setup the Vehicle SQL Table
# Connecting to DB
conn, cursor = connect()

# SQL command to create inventory table
create_table = """
    CREATE TABLE IF NOT EXISTS vehicle(
        index INTEGER,
        caseNumber BIGINT NOT NULL,
        vehicleNumber INTEGER NOT NULL,
        make TEXT,
        makeModel TEXT,
        modelYear TEXT,
        vehicleSpeed TEXT,
        PRIMARY KEY (caseNumber, vehicleNumber)
    )
    """

# Execute SQL Command and commit to DB
cursor.execute(create_table)
conn.commit()

# Disconnect from DB
conn.close()

In [39]:
# Setup the Distraction SQL Table
# Connecting to DB
conn, cursor = connect()

# SQL command to create inventory table
create_table = """
    CREATE TABLE IF NOT EXISTS distract(
        index INTEGER,
        caseNumber BIGINT NOT NULL,
        vehicleNumber INTEGER NOT NULL,
        Distraction TEXT
    )
    """

# Execute SQL Command and commit to DB
cursor.execute(create_table)
conn.commit()

# Disconnect from DB
conn.close()

Connected!


In [47]:
# Setup the Person SQL Table
# Connecting to DB
conn, cursor = connect()

# SQL command to create inventory table
create_table = """
    CREATE TABLE IF NOT EXISTS person(
        index INTEGER,
        caseNumber BIGINT NOT NULL,
        vehicleNumber Integer NOT NULL,
        personNumber INTEGER NOT NULL,
        sex TEXT,
        age TEXT,
        alcoholStatus TEXT,
        airBagStatus TEXT,
        restraintUseStatus TEXT,
        PRIMARY KEY (caseNumber, vehicleNumber, personNumber)        
    )
    """

# Execute SQL Command and commit to DB
cursor.execute(create_table)
conn.commit()

# Disconnect from DB
conn.close()

Connected!


In [44]:
# Setup the Accident SQL Table
# Connecting to DB
conn, cursor = connect()

# SQL command to create inventory table
create_table = """
    CREATE TABLE IF NOT EXISTS accident(
        Index INTEGER,
        CaseNumber BIGINT NOT NULL,
        Stratum TEXT,
        Region TEXT,
        CityType TEXT,
        Month TEXT,
        Hour INT,
        Alcohol TEXT,
        Weather TEXT,
        CollisionType TEXT,
        Event TEXT,
        RoadType TEXT,
        PRIMARY KEY (caseNumber)
    )
    """

# Execute SQL Command and commit to DB
cursor.execute(create_table)
conn.commit()

# Disconnect from DB
conn.close()

Connected!


In [13]:
from sqlalchemy import create_engine

In [14]:
from sqlalchemy import create_engine
db_string = f"postgresql://postgres:{creds.PGPASSWORD}@crashstats.crogg2abmvvo.us-east-2.rds.amazonaws.com/postgres"
engine = create_engine(db_string)

Once the tables have been created in PostgreSQL, import the data into postgres from the csvs that have been created.

In [15]:
test_data = pd.read_sql_table("vehicle", engine)
test_data

Unnamed: 0,casenumber,vehiclenumber,make,makemodel,modelyear,vehiclespeed
0,201901174219,1,Ford,Ford Ranger,2006,Not Reported
1,201901176655,1,Dodge,Dodge Dart (2013 on. See model 001 for 1960-19...,2015,Not Reported
2,201901176655,2,Honda,Honda CR-V,2006,Not Reported
3,201901176667,1,Nissan/Datsun,Nissan/Datsun Xterra,2000,040 MPH
4,201901176667,2,Chevrolet,"Chevrolet C, K, R, V-series pickup/Silverado",1979,Not Reported
...,...,...,...,...,...,...
96712,201902285945,1,Chrysler,Chrysler PT Cruiser,2004,010 MPH
96713,201902285953,1,Ford,Ford F-Series pickup,2015,015 MPH
96714,201902285953,2,Honda,Honda Pilot,2004,015 MPH
96715,201902285965,1,KIA,KIA Rio/Rio5,2019,035 MPH


In [16]:
test_data = pd.read_sql_table("vehicledist", engine)
test_data

Unnamed: 0,casenumber,vehiclenumber,make,makemodel,modelyear,vehiclespeed,distraction
0,201901176655,1,Dodge,Dodge Dart (2013 on. See model 001 for 1960-19...,2015,Not Reported,Not Reported
1,201901176702,2,Toyota,Toyota Corolla,2018,005 MPH,Reported as Unknown if Distracted
2,201901180044,2,Ford,Ford F-Series pickup,2005,Not Reported,Not Reported
3,201901180165,2,Toyota,Toyota Prius *,2013,Stopped Motor Vehicle In- Transport,Not Distracted
4,201901180224,1,Ford,Ford Transit,2016,Not Reported,Not Distracted
...,...,...,...,...,...,...,...
96746,201902285693,1,Chevrolet,Chevrolet S-10/T-10 Pickup,2002,050 MPH,Not Distracted
96747,201902285713,1,Toyota,Toyota Tundra,2001,065 MPH,Reported as Unknown if Distracted
96748,201902285766,1,Ford,Ford Unknown (automobile),2011,Not Reported,Not Reported
96749,201902285809,1,Toyota,Toyota Yaris,2007,015 MPH,Not Distracted
