# Changes in Traffic Accidents by County Over Time
<br>This code analyzes traffic accident data across counties over time.
<br>It processes CSVs, calculates year-over-year changes in accidents, and computes a cumulative score for each county.
<br>The results are visualized in an interactive choropleth map, color-coding counties based on their accident trend scores and displaying county names and scores on hover.
<br>Additionally, line charts showing yearly accident counts for each county are embedded in popups.

In [1]:
import pandas as pd
import geopandas as gpd
from scatter import check_cols
import folium
from folium.plugins import MarkerCluster
import branca.colormap as cm
import matplotlib.pyplot as plt
import io
import base64

In [2]:
# Read data files
df = []
for year in range(2017, 2022):
    data = pd.read_csv(f'../../sc_data/sc_loc{year}.csv', low_memory=False)
    data['year'] = year

    # Check if the columns are consistent
    _, data = check_cols('lat', 'lon', data, "")

    df.append(data)

# Combine the dataframes
df = pd.concat(df, ignore_index=True)
print(f"Length of the dataset: {len(df):,}")

Length of the dataset: 694,335


In [3]:
df['year'].value_counts()

year
2021    147724
2018    142406
2017    141874
2019    141096
2020    121235
Name: count, dtype: int64

In [4]:
# Replace the numerical representation with county names
county_dict = {
    1: 'Abbeville', 2: 'Aiken', 3: 'Allendale', 4: 'Anderson', 5: 'Bamberg',
    6: 'Barnwell', 7: 'Beaufort', 8: 'Berkeley', 9: 'Calhoun', 10: 'Charleston',
    11: 'Cherokee', 12: 'Chester', 13: 'Chesterfield', 14: 'Clarendon', 15: 'Colleton',
    16: 'Darlington', 17: 'Dillon', 18: 'Dorchester', 19: 'Edgefield', 20: 'Fairfield',
    21: 'Florence', 22: 'Georgetown', 23: 'Greenville', 24: 'Greenwood', 25: 'Hampton',
    26: 'Horry', 27: 'Jasper', 28: 'Kershaw', 29: 'Lancaster', 30: 'Laurens',
    31: 'Lee', 32: 'Lexington', 33: 'McCormick', 34: 'Marion', 35: 'Marlboro',
    36: 'Newberry', 37: 'Oconee', 38: 'Orangeburg', 39: 'Pickens', 40: 'Richland',
    41: 'Saluda', 42: 'Spartanburg', 43: 'Sumter', 44: 'Union', 45: 'Williamsburg',
    46: 'York'
}
df['cty'] = df['cty'].replace(county_dict)

In [5]:
# Group by county and year, count accidents
accidents_by_county_year = df.groupby(['cty', 'year']).size().unstack(fill_value=0)
accidents_by_county_year.head()

year,2017,2018,2019,2020,2021
cty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Abbeville,327,346,287,299,355
Aiken,4081,3907,4081,3544,4406
Allendale,119,119,113,105,127
Anderson,5391,5428,5094,4704,5930
Bamberg,185,177,205,194,211


In [6]:
# Calculate year-over-year change
yoy_change = accidents_by_county_year.diff(axis=1)
yoy_change.head()

year,2017,2018,2019,2020,2021
cty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Abbeville,,19,-59,12,56
Aiken,,-174,174,-537,862
Allendale,,0,-6,-8,22
Anderson,,37,-334,-390,1226
Bamberg,,-8,28,-11,17


In [7]:
# Compute cumulative score
cumulative_score = yoy_change.apply(lambda x: (x > 0).astype(int) - (x < 0).astype(int)).cumsum(axis=1)
cumulative_score.head()

year,2017,2018,2019,2020,2021
cty,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Abbeville,0,1,0,1,2
Aiken,0,-1,0,-1,0
Allendale,0,0,-1,-2,-1
Anderson,0,1,0,-1,0
Bamberg,0,-1,0,-1,0


In [8]:
# Get the final score (last column of cumulative_score)
final_score = cumulative_score.iloc[:, -1]
final_score.head()

cty
Abbeville    2
Aiken        0
Allendale   -1
Anderson     0
Bamberg      0
Name: 2021, dtype: int64

In [9]:
# Create a DataFrame with county, final score, and the
final_df = pd.DataFrame({
    'county': final_score.index,
    'score': final_score.values,
}).reset_index(drop=True)
final_df.head()

Unnamed: 0,county,score
0,Abbeville,2
1,Aiken,0
2,Allendale,-1
3,Anderson,0
4,Bamberg,0


In [10]:
# Load county boundaries
counties_gdf = gpd.read_file('South Carolina County Boundaries.geojson')

# Merge accident data with geospatial data
merged_data = counties_gdf.merge(final_df, left_on='name', right_on='county')
merged_data.head()

Unnamed: 0,name,id,STATE,TYPE,CNTRY,geometry,county,score
0,York,45091,SC,County,USA,"POLYGON ((-80.90200 35.07200, -80.90600 35.050...",York,0
1,Williamsburg,45089,SC,County,USA,"POLYGON ((-79.32500 33.79900, -79.31700 33.780...",Williamsburg,0
2,Union,45087,SC,County,USA,"POLYGON ((-81.71200 34.91300, -81.69500 34.909...",Union,0
3,Sumter,45085,SC,County,USA,"POLYGON ((-80.00100 34.04800, -79.95600 34.020...",Sumter,0
4,Spartanburg,45083,SC,County,USA,"POLYGON ((-81.71200 34.91300, -81.74300 34.882...",Spartanburg,0


In [11]:
# Prepare yearly data for each county
yearly_data = accidents_by_county_year.reset_index()
yearly_data = yearly_data.melt(id_vars=['cty'], var_name='year', value_name='accidents')
yearly_data['year'] = yearly_data['year'].astype(int)
yearly_data.head()

Unnamed: 0,cty,year,accidents
0,Abbeville,2017,327
1,Aiken,2017,4081
2,Allendale,2017,119
3,Anderson,2017,5391
4,Bamberg,2017,185


In [12]:
def create_line_chart(county):
    county_data = yearly_data[yearly_data['cty'] == county]  # Filter data for the county

    # Create the line chart
    plt.figure(figsize=(6, 4))
    plt.plot(county_data['year'], county_data['accidents'], marker='o')
    plt.title(f"{county} Accidents Over Time")
    plt.xlabel("Year")
    plt.ylabel("Number of Accidents")

    # Set x-axis ticks to use integer years
    plt.xticks(county_data['year'].unique())
    
    # Use integer formatting for x-axis labels
    plt.gca().xaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f"{int(x)}"))
    plt.tight_layout()    

    # Save the chart to a PNG in memory
    img = io.BytesIO()
    plt.savefig(img, format='png')
    img.seek(0)
    plt.close()

    return base64.b64encode(img.getvalue()).decode()  # Encode the image as a base64 string

In [13]:
# Create a dictionary of county: chart image
county_charts = {county: create_line_chart(county) for county in yearly_data['cty'].unique()}

In [14]:
# Create the map
m = folium.Map(location=[33.8361, -81.1637], zoom_start=7)

In [15]:
# Create a colormap
colormap = cm.LinearColormap(colors=['red', 'yellow', 'green'], vmin=merged_data['score'].min(),
                             vmax=merged_data['score'].max())

In [16]:
# Add the choropleth layer
folium.Choropleth(
    geo_data=merged_data,
    name='choropleth',
    data=merged_data,
    columns=['county', 'score'],
    key_on='feature.properties.county',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Cumulative Score'
).add_to(m)

<folium.features.Choropleth at 0x32000bb90>

In [17]:
# Add markers with popups
for idx, row in merged_data.iterrows():
    popup_content = f"""
    <h4>{row['county']} County</h4>
    <p>Cumulative Score: {row['score']}</p>
    <img src="data:image/png;base64,{county_charts[row['county']]}" width="300" height="200">
    """

    folium.Marker(
        location=[row.geometry.centroid.y, row.geometry.centroid.x],
        popup=folium.Popup(popup_content, max_width=350),
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(m)

In [18]:
# Save the map
m.save("../maps/choropleth_with_popups.html")