In [8]:
# Importing modules
import pandas as pd
import sqlite3

state_to_region = {
    'Maryland': 'South', 'Delaware': 'South', 'District of Columbia': 'South', 'West Virginia': 'South', 'Virginia': 'South',
    'Kentucky': 'South', 'Tennessee': 'South', 'North Carolina': 'South', 'South Carolina': 'South', 'Georgia': 'South',
    'Florida': 'South', 'Alabama': 'South', 'Mississippi': 'South', 'Louisiana': 'South', 'Arkansas': 'South',
    'Oklahoma': 'South', 'Texas': 'South', 'Montana': 'West', 'Idaho': 'West', 'Washington': 'West',
    'Oregon': 'West', 'California': 'West', 'Nevada': 'West', 'New Mexico': 'West', 'Arizona': 'West',
    'Utah': 'West', 'Colorado': 'West', 'Wyoming': 'West', 'Alaska': 'West', 'Hawaii': 'West',
    'Pennsylvania': 'Northeast', 'New Jersey': 'Northeast', 'New York': 'Northeast', 'New Hampshire': 'Northeast',
    'Vermont': 'Northeast', 'Rhode Island': 'Northeast', 'Massachusetts': 'Northeast', 'Maine': 'Northeast',
    'Connecticut': 'Northeast', 'Ohio': 'Midwest', 'Indiana': 'Midwest', 'Illinois': 'Midwest',
    'Michigan': 'Midwest', 'Wisconsin': 'Midwest', 'Minnesota': 'Midwest', 'North Dakota': 'Midwest',
    'South Dakota': 'Midwest', 'Nebraska': 'Midwest', 'Iowa': 'Midwest', 'Missouri': 'Midwest', 'Kansas': 'Midwest'
}

# Cleaning the DataFrame
def clean_dataframe(dataframe):
    columnstoremove = ['ST_CASE', 'COUNTY', 'CITY', 'CITYNAME', 'DAYNAME', 
                       'HOURNAME', 'MINUTENAME', 'TWAY_ID2', 'MILEPTNAME', 
                       'NHSNAME', 'SP_JURNAME', 'RELJCT1', 'RELJCT1NAME', 
                       'RELJCT2', 'RELJCT2NAME', 'ARR_HOURNAME', 'ARR_MINNAME', 
                       'LATITUDENAME', 'LONGITUDNAME']
    
    cleaned_df = dataframe.drop(columnstoremove, axis=1)
    # Add REGION and REGIONNAME columns
    cleaned_df['REGION'] = cleaned_df['STATENAME'].map(state_to_region)
    cleaned_df['REGIONNAME'] = cleaned_df['REGION']
    return cleaned_df

# Loading and cleaning CSVs
df_2021 = clean_dataframe(pd.read_csv('../data/accident2021.csv', encoding='ISO-8859-1'))
df_2020 = clean_dataframe(pd.read_csv('../data/accident2020.csv', encoding='ISO-8859-1'))
df_2019 = clean_dataframe(pd.read_csv('../data/accident2019.csv', encoding='ISO-8859-1'))

# Define a function to map values
def reorganize_lgt_cond(value):
    if value in [1, 4]:
        return 'Day'
    elif value in [2, 3, 5, 6]:
        return 'Night'
    else:
        return 'Unknown'
    
# Apply the function to the 'LGT_COND' column in all DataFrames
df_2021['LGT_COND'] = df_2021['LGT_COND'].apply(reorganize_lgt_cond)
df_2020['LGT_COND'] = df_2020['LGT_COND'].apply(reorganize_lgt_cond)
df_2019['LGT_COND'] = df_2019['LGT_COND'].apply(reorganize_lgt_cond)

# Concatenating the dataframes
all_years_df = pd.concat([df_2021, df_2020, df_2019])

# Renaming colums 
all_years_df = all_years_df.rename(columns={'ROUTENAME': 'ROADTYPE',
                               'RUR_URBNAME': 'POPULATION',
                               'HARM_EVNAME': 'ACCIDENTTYPE',
                               'TYP_INTNAME': 'INTERSECTIONTYPE',
                               'REL_ROADNAME': 'LOCATION',
                               'LGT_CONDNAME': 'LIGHTING',
                               'WEATHERNAME': 'WEATHERTYPE',
                               'NOT_HOURNAME': 'TIME'})

# Connecting to the SQLite database
conn = sqlite3.connect('../data/accident_database.db')
all_years_df.to_sql('accidents', conn, if_exists='replace', index=False)
# Query to select data needed for map
mapquery = "SELECT LATITUDE, LONGITUD, YEAR, REGION, POPULATION, LOCATION from accidents"
mapdf = pd.read_sql_query(mapquery, conn)
# Exporting to JSON
mapdf.to_json('../data/mapdata.json', orient='records')
# Query to select data needed for Day/Night Chart
daynightquery = "SELECT YEAR, REGION, LGT_COND from accidents"
daynightdf = pd.read_sql_query(daynightquery, conn)
# Exporting to JSON
daynightdf.to_json('../data/daynight.json', orient='records')
# Closing connection
conn.close()

  df_2019 = clean_dataframe(pd.read_csv('../data/accident2019.csv', encoding='ISO-8859-1'))
