In [6]:
# Edward Pasco ML Model

# General Imports for Project
import pandas as pd
import numpy as np
import folium
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from geopy.distance import geodesic

try:
    food = pd.read_csv('rows.csv') # data for the food/grocery access (https://data.virginia.gov/gl/dataset/usda-foodenvironmentatlas-access-and-proximity-to-grocery-store)
    traffic = pd.read_csv('Traffic_Volume_2021.csv') # data for VA traffic in 2021 (https://open-data-pittsylvania.hub.arcgis.com/datasets/VDOT::vdot-bidirectional-traffic-volume-2021/explore?location=37.671466%2C-78.788203%2C6.23)
except FileNotFoundError:
    print("One or both of the CSV files were not found. Please ensure they are in the current directory.")
    exit()

# First I will visualize the data and then use a cleaning and prep pipeline
print("Food columns:", food.columns)
print("Traffic columns:", traffic.columns)

food.head(3)
traffic.head(3)

# Base map centered on Virginia (approximate center coordinates)
va_map = folium.Map(location=[37.4316, -78.6569], zoom_start=7)

# Plot food access points in red.
for idx, row in food.iterrows():
    try:
        lat, lon = float(food['latitude']), float(food['longitude'])
        folium.CircleMarker(
            location=[lat, lon],
            radius=2,
            color='red',
            fill=True,
            fill_color='red'
        ).add_to(va_map)
    except Exception as e:
        continue

# Plot traffic points in blue
# Change 'Traffic_Volume' to your actual column name if different.
for idx, row in traffic.iterrows():
    try:
        lat, lon = float(traffic['latitude']), float(traffic['longitude'])
        folium.CircleMarker(
            location=[lat, lon],
            radius=2,
            color='blue',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        ).add_to(va_map)
    except Exception as e:
        continue

# Display the map
va_map


# Clustering Attempt

# TODO: figure out how to apply to the attributes in the datasets
food_coords = food[['Latitude', 'Longitude']].dropna().astype(float)

# Plotting, use the updated names:
for idx, row in food.iterrows():
    try:
        lat, lon = float(row['Latitude']), float(row['Longitude'])
        folium.CircleMarker(
            location=[lat, lon],
            radius=2,
            color='red',
            fill=True,
            fill_color='red'
        ).add_to(va_map)
    except Exception as e:
        continue
food_coords = food[['latitude', 'longitude']].dropna().astype(float)

# Set number of clusters
n_clusters = 5
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(food_coords)
centers = kmeans.cluster_centers_

# Plot candidate centers on the map using green markers.
for center in centers:
    folium.Marker(
        location=[center[0], center[1]],
        icon=folium.Icon(color='green')
    ).add_to(va_map)

# Update map with candidate locations
va_map

# Scatter plot for additional visualization.
plt.figure(figsize=(10,8))
plt.scatter(food_coords['longitude'], food_coords['latitude'], c='red', s=10, label='Food Access Points')
plt.scatter(centers[:,1], centers[:,0], c='green', marker='x', s=200, label='Candidate Locations')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Food Access Points and Proposed Grocery Store Locations')
plt.legend()
plt.show()

# Candidate Locations with Traffic Data

# helper function to compute the geodesic distance in meters between two (lat, lon) pairs.
def distance_meters(coord1, coord2):
    return geodesic(coord1, coord2).meters

# Search radius (meters) for nearby traffic data.
radius_meters = 5000  # 5 km radius

# Evaluate each candidate location (summing the traffic volume of traffic points within the radius)
# Adjust 'Traffic_Volume' to match your traffic dataset's column name if needed.
candidate_traffic_info = []

for center in centers:
    center_coord = (center[0], center[1])
    total_volume = 0
    count = 0
    for idx, row in traffic.iterrows():
        try:
            traffic_coord = (float(row['latitude']), float(row['longitude']))
            dist = distance_meters(center_coord, traffic_coord)
            if dist <= radius_meters:
                volume = row.get('Traffic_Volume', 0)
                total_volume += float(volume)
                count += 1
        except Exception as e:
            continue
    candidate_traffic_info.append({
        'latitude': center[0],
        'longitude': center[1],
        'total_traffic_volume': total_volume,
        'num_traffic_points': count
    })
    print(f"Candidate at {center_coord}: Total Traffic Volume = {total_volume}, Traffic Points = {count}")

# Define a traffic volume threshold for selecting promising candidate locations.
# This threshold is arbitrary—adjust based on your data and project needs.
traffic_threshold = 10000

final_candidates = [candidate for candidate in candidate_traffic_info if candidate['total_traffic_volume'] >= traffic_threshold]

print("\nFinal Candidate Locations (meeting traffic volume threshold):")
for candidate in final_candidates:
    print(f"Latitude: {candidate['latitude']:.4f}, Longitude: {candidate['longitude']:.4f}, Total Traffic Volume: {candidate['total_traffic_volume']}")

# Plot Candidate Locations on the Map

# Purple markers for these spots
for candidate in final_candidates:
    folium.Marker(
        location=[candidate['latitude'], candidate['longitude']],
        icon=folium.Icon(color='purple', icon='star')
    ).add_to(va_map)

# Display the final map
va_map

Food columns: Index(['FIPS', 'State', 'County', 'LACCESS_POP10', 'LACCESS_POP15',
       'PCH_LACCESS_POP_10_15', 'PCT_LACCESS_POP10', 'PCT_LACCESS_POP15',
       'LACCESS_LOWI10', 'LACCESS_LOWI15', 'PCH_LACCESS_LOWI_10_15',
       'PCT_LACCESS_LOWI10', 'PCT_LACCESS_LOWI15', 'LACCESS_HHNV10',
       'LACCESS_HHNV15', 'PCH_LACCESS_HHNV_10_15', 'PCT_LACCESS_HHNV10',
       'PCT_LACCESS_HHNV15', 'LACCESS_SNAP15', 'PCT_LACCESS_SNAP15',
       'LACCESS_CHILD10', 'LACCESS_CHILD15', 'LACCESS_CHILD_10_15',
       'PCT_LACCESS_CHILD10', 'PCT_LACCESS_CHILD15', 'LACCESS_SENIORS10',
       'LACCESS_SENIORS15', 'PCH_LACCESS_SENIORS_10_15',
       'PCT_LACCESS_SENIORS10', 'PCT_LACCESS_SENIORS15', 'LACCESS_WHITE15',
       'PCT_LACCESS_WHITE15', 'LACCESS_BLACK15', 'PCT_LACCESS_BLACK15',
       'LACCESS_HISP15', 'PCT_LACCESS_HISP15', 'LACCESS_NHASIAN15',
       'PCT_LACCESS_NHASIAN15', 'LACCESS_NHNA15', 'PCT_LACCESS_NHNA15',
       'LACCESS_NHPI15', 'PCT_LACCESS_NHPI15', 'LACCESS_MULTIR15',
       'PC

KeyError: "None of [Index(['Latitude', 'Longitude'], dtype='object')] are in the [columns]"