In [3]:
import pandas as pd

# Replace 'your_file.csv' with the path to your CSV file
file_path = "US_Accidents_March23.csv"
data = pd.read_csv(file_path, nrows=10000, usecols=['Start_Lat', 'Start_Lng'])
print(data.head())

   Start_Lat  Start_Lng
0  39.865147 -84.058723
1  39.928059 -82.831184
2  39.063148 -84.032608
3  39.747753 -84.205582
4  39.627781 -84.188354


In [None]:
import plotly.express as px

fig = px.scatter_geo(data,
                     lat='Start_Lat',
                     lon='Start_Lng',
                     scope='usa',  # Focus the map on the USA
                     title='Traffic Accident Hotspots in the US',
                     projection='albers usa',  # Use the Albers projection for the USA
                     hover_name='Start_Lat',  # Show latitude on hover (you can customize this)
                     color_discrete_sequence=['red'],  # Use red dots to represent accidents
                     )

fig.show()


In [3]:
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
import folium
from folium.plugins import HeatMap

# Step 1: Load the data
df = pd.read_csv("US_Accidents_March23.csv", nrows=100000)

# Assuming your CSV has 'latitude' and 'longitude' columns
latitude = 'Start_Lat'
longitude = 'Start_Lng'

# Step 2: Preprocess the data (if necessary, depending on your CSV structure)
# This step is skipped here but involves ensuring data types are correct, handling missing values, etc.

# Step 3: Clustering
# Convert location data to a numpy array for DBSCAN
coords = df[[latitude, longitude]].to_numpy()

# DBSCAN clustering
db = DBSCAN(eps=0.01, min_samples=10, algorithm='ball_tree', metric='haversine').fit_predict(np.radians(coords))

# Add cluster labels to the dataframe
df['cluster'] = db

# Filter out noise (-1 labels)
df_clustered = df[df['cluster'] != -1]

# Step 4: Mapping with Folium
# Create a map centered around an average location
map_us = folium.Map(location=[df[latitude].mean(), df[longitude].mean()], zoom_start=5)

# Generate clusters for the heatmap (average location of points in each cluster)
clusters = df_clustered.groupby('cluster')[[latitude, longitude]].mean().values

# Add a heat map layer for accident hotspots
HeatMap(clusters).add_to(map_us)

# Save the map to an HTML file
map_us.save('us_traffic_accident_hotspots.html')


In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Step 1: Load the accident data
df = pd.read_csv('path_to_your_csv_file.csv', nrows=100000)
# Ensure correct data types for latitude and longitude
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

# Step 2: Convert your DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))

# Step 3: Load a shapefile of the US states (ensure you have one that includes Texas)
# You can find shapefiles of US states online, e.g., from the US Census Bureau or Natural Earth Data
states = gpd.read_file('path_to_shapefile_of_US_states.shp')

# Filter the shapefile to include only Texas
texas = states[states['NAME'] == 'Texas']

# Step 4: Filter your data points to include only those within Texas
# This creates a boolean series that's True for points within the Texas geometry
points_in_texas = gdf.within(texas.iloc[0].geometry)

# Filter the GeoDataFrame to keep only points in Texas
gdf_texas = gdf[points_in_texas]

# Now, gdf_texas contains only the points that are within Texas


In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Load the accident data
df = pd.read_csv("US_Accidents_March23.csv", usecols=['Start_Lat', 'Start_Lng'], nrows=100000)

# Convert DataFrame to GeoDataFrame
gdf_accidents = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Start_Lng, df.Start_Lat))

# Load the Texas shapefile
gdf_texas = gpd.read_file("tl_2016_48_cousub.shp")

# Ensure both GeoDataFrames use the same CRS
gdf_accidents.crs = gdf_texas.crs

# Filter accidents to include only those within Texas
texas_accidents = gpd.sjoin(gdf_accidents, gdf_texas, how="inner", op='intersects')

# texas_accidents now contains only the accidents that occurred within Texas
print(texas_accidents.head())

# Optional: Save the filtered data to a new CSV file
texas_accidents.to_csv("Texas_Accidents.csv", index=False)
