# Changes in Traffic Accidents by County Over Time
<br>This code analyzes traffic accident data across counties over time.
<br>It processes CSVs, calculates year-over-year changes in accidents, and computes a cumulative score for each county.
<br>The results are visualized in an interactive choropleth map, color-coding counties based on their accident trend scores and displaying county names and scores on hover.

In [1]:
import pandas as pd
import geopandas as gpd
import plotly.express as px
from scatter import check_cols

In [2]:
# Read data files
df = []
for year in range(2017, 2022):
    data = pd.read_csv(f'../../sc_data/sc_loc{year}.csv', low_memory=False)
    data['year'] = year

    # Check if the columns are consistent
    _, data = check_cols('lat', 'lon', data, "")

    df.append(data)

# Combine the dataframes
df = pd.concat(df, ignore_index=True)
print(f"Length of the dataset: {len(df):,}")

Length of the dataset: 694,335


In [3]:
# Convert year to datetime if it's not already
df['year'].value_counts()

year
2021    147724
2018    142406
2017    141874
2019    141096
2020    121235
Name: count, dtype: int64

In [4]:
# Replace the numerical representation with county names
county_dict = {
    1: 'Abbeville', 2: 'Aiken', 3: 'Allendale', 4: 'Anderson', 5: 'Bamberg',
    6: 'Barnwell', 7: 'Beaufort', 8: 'Berkeley', 9: 'Calhoun', 10: 'Charleston',
    11: 'Cherokee', 12: 'Chester', 13: 'Chesterfield', 14: 'Clarendon', 15: 'Colleton',
    16: 'Darlington', 17: 'Dillon', 18: 'Dorchester', 19: 'Edgefield', 20: 'Fairfield',
    21: 'Florence', 22: 'Georgetown', 23: 'Greenville', 24: 'Greenwood', 25: 'Hampton',
    26: 'Horry', 27: 'Jasper', 28: 'Kershaw', 29: 'Lancaster', 30: 'Laurens',
    31: 'Lee', 32: 'Lexington', 33: 'McCormick', 34: 'Marion', 35: 'Marlboro',
    36: 'Newberry', 37: 'Oconee', 38: 'Orangeburg', 39: 'Pickens', 40: 'Richland',
    41: 'Saluda', 42: 'Spartanburg', 43: 'Sumter', 44: 'Union', 45: 'Williamsburg',
    46: 'York'
}
df['cty'] = df['cty'].replace(county_dict)

In [5]:
# Group by county and year, count accidents
accidents_by_county_year = df.groupby(['cty', 'year']).size().unstack(fill_value=0)
accidents_by_county_year.head()

year,2017,2018,2019,2020,2021
cty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Abbeville,327,346,287,299,355
Aiken,4081,3907,4081,3544,4406
Allendale,119,119,113,105,127
Anderson,5391,5428,5094,4704,5930
Bamberg,185,177,205,194,211


In [6]:
# Calculate year-over-year change
yoy_change = accidents_by_county_year.diff(axis=1)
yoy_change.head()

year,2017,2018,2019,2020,2021
cty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Abbeville,,19,-59,12,56
Aiken,,-174,174,-537,862
Allendale,,0,-6,-8,22
Anderson,,37,-334,-390,1226
Bamberg,,-8,28,-11,17


In [7]:
# Compute cumulative score
cumulative_score = yoy_change.apply(lambda x: (x > 0).astype(int) - (x < 0).astype(int)).cumsum(axis=1)
cumulative_score.head()

year,2017,2018,2019,2020,2021
cty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Abbeville,0,1,0,1,2
Aiken,0,-1,0,-1,0
Allendale,0,0,-1,-2,-1
Anderson,0,1,0,-1,0
Bamberg,0,-1,0,-1,0


In [8]:
# Get the final score (last column of cumulative_score)
final_score = cumulative_score.iloc[:, -1]
final_score.head()

cty
Abbeville    2
Aiken        0
Allendale   -1
Anderson     0
Bamberg      0
Name: 2021, dtype: int64

In [9]:
# Create a DataFrame with county, final score, and the 
final_df = pd.DataFrame({
    'county': final_score.index,
    'score': final_score.values,
}).reset_index(drop=True)

final_df.head()

Unnamed: 0,county,score
0,Abbeville,2
1,Aiken,0
2,Allendale,-1
3,Anderson,0
4,Bamberg,0


In [10]:
# Load county boundaries
counties_gdf = gpd.read_file('South Carolina County Boundaries.geojson')

In [11]:
# Merge accident data with geospatial data
merged_data = counties_gdf.merge(final_df, left_on='name', right_on='county')
merged_data.head()

Unnamed: 0,name,id,STATE,TYPE,CNTRY,geometry,county,score
0,York,45091,SC,County,USA,"POLYGON ((-80.90200 35.07200, -80.90600 35.050...",York,0
1,Williamsburg,45089,SC,County,USA,"POLYGON ((-79.32500 33.79900, -79.31700 33.780...",Williamsburg,0
2,Union,45087,SC,County,USA,"POLYGON ((-81.71200 34.91300, -81.69500 34.909...",Union,0
3,Sumter,45085,SC,County,USA,"POLYGON ((-80.00100 34.04800, -79.95600 34.020...",Sumter,0
4,Spartanburg,45083,SC,County,USA,"POLYGON ((-81.71200 34.91300, -81.74300 34.882...",Spartanburg,0


In [14]:
# Create the choropleth map
fig = px.choropleth_mapbox(
    merged_data,
    geojson=merged_data.geometry,
    locations=merged_data.index,
    color='score',
    color_continuous_scale="RdYlGn_r",  # Red for negative, Yellow for neutral, Green for positive
    mapbox_style="carto-positron",
    zoom=6,  # Adjust this value to fit your region
    center={"lat": 33.8361, "lon": -81.1637},
    opacity=0.5,
    labels={'score': 'Cumulative Score', 'county': 'County'},
    hover_data={'county': True, 'score': True},
    title='County-Level Traffic Accident Trends: Cumulative Change Over Time'
)

# In the hovertemplate, we use %{customdata[0]} to display the county name. This works because county_name is the first (index 0) column in our hover_data.
fig.update_traces(
    hovertemplate="<b>County:</b> %{customdata[0]}<br><b>Cumulative Score:</b> %{z}<extra></extra>"
)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0});

In [15]:
# Show the figure
fig.write_html("../maps/choropleth_trends.html")