In [4]:
# Importing modules
import pandas as pd
import sqlite3

state_to_region = {
    'Maryland': 'South', 'Delaware': 'South', 'District of Columbia': 'South', 'West Virginia': 'South', 'Virginia': 'South',
    'Kentucky': 'South', 'Tennessee': 'South', 'North Carolina': 'South', 'South Carolina': 'South', 'Georgia': 'South',
    'Florida': 'South', 'Alabama': 'South', 'Mississippi': 'South', 'Louisiana': 'South', 'Arkansas': 'South',
    'Oklahoma': 'South', 'Texas': 'South', 'Montana': 'West', 'Idaho': 'West', 'Washington': 'West',
    'Oregon': 'West', 'California': 'West', 'Nevada': 'West', 'New Mexico': 'West', 'Arizona': 'West',
    'Utah': 'West', 'Colorado': 'West', 'Wyoming': 'West', 'Alaska': 'West', 'Hawaii': 'West',
    'Pennsylvania': 'Northeast', 'New Jersey': 'Northeast', 'New York': 'Northeast', 'New Hampshire': 'Northeast',
    'Vermont': 'Northeast', 'Rhode Island': 'Northeast', 'Massachusetts': 'Northeast', 'Maine': 'Northeast',
    'Connecticut': 'Northeast', 'Ohio': 'Midwest', 'Indiana': 'Midwest', 'Illinois': 'Midwest',
    'Michigan': 'Midwest', 'Wisconsin': 'Midwest', 'Minnesota': 'Midwest', 'North Dakota': 'Midwest',
    'South Dakota': 'Midwest', 'Nebraska': 'Midwest', 'Iowa': 'Midwest', 'Missouri': 'Midwest', 'Kansas': 'Midwest'
}

# Cleaning the DataFrame
def clean_dataframe(dataframe):
    columnstoremove = ['ST_CASE', 'COUNTY', 'CITY', 'CITYNAME', 'DAYNAME', 
                       'HOURNAME', 'MINUTENAME', 'TWAY_ID2', 'MILEPTNAME', 
                       'NHSNAME', 'SP_JURNAME', 'RELJCT1', 'RELJCT1NAME', 
                       'RELJCT2', 'RELJCT2NAME', 'ARR_HOURNAME', 'ARR_MINNAME', 
                       'LATITUDENAME', 'LONGITUDNAME']
    cleaned_df = dataframe.drop(columnstoremove, axis=1)

    # Add REGION and REGIONNAME columns
    cleaned_df['REGION'] = cleaned_df['STATENAME'].map(state_to_region)
    cleaned_df['REGIONNAME'] = cleaned_df['REGION']
    return cleaned_df

# Show all columns
pd.set_option('display.max_columns', None)

# Loading and cleaning CSVs
df_2021 = clean_dataframe(pd.read_csv('../data/accident2021.csv', encoding='ISO-8859-1'))
df_2020 = clean_dataframe(pd.read_csv('../data/accident2020.csv', encoding='ISO-8859-1'))
df_2019 = clean_dataframe(pd.read_csv('../data/accident2019.csv', encoding='ISO-8859-1'))

# Concatenating the dataframes
all_years_df = pd.concat([df_2021, df_2020, df_2019])

# Connecting to the SQLite database
conn = sqlite3.connect('../data/accident_database.db')
all_years_df.to_sql('accidents', conn, if_exists='replace', index=False)

# Query to select data needed for map
query = "SELECT LATITUDE, LONGITUD, STATE, REGION, YEAR FROM accidents"
df = pd.read_sql_query(query, conn)

# Exporting to JSON
df.to_json('../data/mapdata.json', orient='records')

# Closing connection
conn.close()

  df_2019 = clean_dataframe(pd.read_csv('../data/accident2019.csv', encoding='ISO-8859-1'))
