#

In [37]:
import pandas as pd
import numpy as np

# Load your data
rain_gardens = pd.read_csv('rain_gardens.csv')
trash_cans = pd.read_csv('trash_cans.csv')
bus_stops = pd.read_csv('bus_stops.csv')
pedestrians = pd.read_csv('pedestrians.csv')
flood_zones = pd.read_csv('flood_zones.csv')

# Convert lat/lon to numeric (handle any string formatting issues)
rain_gardens['lon'] = pd.to_numeric(rain_gardens['lon'], errors='coerce')
rain_gardens['lat'] = pd.to_numeric(rain_gardens['lat'], errors='coerce')

trash_cans['lon'] = pd.to_numeric(trash_cans['lon'], errors='coerce')
trash_cans['lat'] = pd.to_numeric(trash_cans['lat'], errors='coerce')

bus_stops['lon'] = pd.to_numeric(bus_stops['lon'], errors='coerce')
bus_stops['lat'] = pd.to_numeric(bus_stops['lat'], errors='coerce')

flood_zones['lon'] = pd.to_numeric(flood_zones['lon'], errors='coerce')
flood_zones['lat'] = pd.to_numeric(flood_zones['lat'], errors='coerce')

pedestrians['lon'] = pd.to_numeric(pedestrians['lon'], errors='coerce')
pedestrians['lat'] = pd.to_numeric(pedestrians['lat'], errors='coerce')
pedestrians['count'] = pd.to_numeric(pedestrians['count'], errors='coerce')

# Drop any rows with NaN coordinates
rain_gardens = rain_gardens.dropna(subset=['lon', 'lat'])
trash_cans = trash_cans.dropna(subset=['lon', 'lat'])
bus_stops = bus_stops.dropna(subset=['lon', 'lat'])
flood_zones = flood_zones.dropna(subset=['lon', 'lat'])
pedestrians = pedestrians.dropna(subset=['lon', 'lat', 'count'])

# Fixed haversine function
def haversine_vectorized(coords1, coords2):
    """
    Calculate haversine distance between two sets of coordinates
    coords: [[lon1, lat1], [lon2, lat2], ...]
    Returns: distance matrix in meters
    """
    coords1 = np.array(coords1, dtype=float)
    coords2 = np.array(coords2, dtype=float)

    coords1_rad = np.radians(coords1)
    coords2_rad = np.radians(coords2)

    lon1 = coords1_rad[:, 0]
    lat1 = coords1_rad[:, 1]
    lon2 = coords2_rad[:, 0]
    lat2 = coords2_rad[:, 1]

    dlon = lon2[np.newaxis, :] - lon1[:, np.newaxis]
    dlat = lat2[np.newaxis, :] - lat1[:, np.newaxis]

    a = np.sin(dlat/2)**2 + np.cos(lat1[:, np.newaxis]) * np.cos(lat2[np.newaxis, :]) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))

    return 6371000 * c

def nearest_distance(garden_coords, feature_coords):
    distances = haversine_vectorized(garden_coords, feature_coords)
    return distances.min(axis=1)

def count_within_radius(garden_coords, feature_coords, radius_meters=100):
    distances = haversine_vectorized(garden_coords, feature_coords)
    return (distances <= radius_meters).sum(axis=1)

# Extract coordinates
garden_coords = rain_gardens[['lon', 'lat']].values.astype(float)
trash_coords = trash_cans[['lon', 'lat']].values.astype(float)
bus_coords = bus_stops[['lon', 'lat']].values.astype(float)
flood_coords = flood_zones[['lon', 'lat']].values.astype(float)

# Calculate features
rain_gardens['dist_to_trash'] = nearest_distance(garden_coords, trash_coords)
rain_gardens['dist_to_bus'] = nearest_distance(garden_coords, bus_coords)
rain_gardens['dist_to_flood'] = nearest_distance(garden_coords, flood_coords)

rain_gardens['trash_count_100m'] = count_within_radius(garden_coords, trash_coords, 100)
rain_gardens['bus_count_100m'] = count_within_radius(garden_coords, bus_coords, 100)

def weighted_pedestrian_score(garden_coords, ped_df, radius=1000):
    ped_coords = ped_df[['lon', 'lat']].values.astype(float)
    distances = haversine_vectorized(garden_coords, ped_coords)

    scores = []
    for i in range(len(garden_coords)):
        within_radius = distances[i] <= radius
        score = ped_df.loc[within_radius, 'count'].sum()
        scores.append(score)

    return np.array(scores)

rain_gardens['pedestrian_score'] = weighted_pedestrian_score(garden_coords, pedestrians)

def normalize(series, inverse=False):
    min_val = series.min()
    max_val = series.max()

    if max_val == min_val:
        return pd.Series([0.5] * len(series), index=series.index)

    normalized = (series - min_val) / (max_val - min_val)

    if inverse:
        normalized = 1 - normalized

    return normalized

# Normalize features
# CHANGED: trash_lack_score - higher score = farther from trash = more likely dumping ground
rain_gardens['trash_lack_score'] = normalize(rain_gardens['dist_to_trash'], inverse=False)  # Far from trash = HIGH score
rain_gardens['bus_proximity_score'] = normalize(rain_gardens['dist_to_bus'], inverse=True)  # Close to bus = HIGH score
rain_gardens['flood_proximity_score'] = normalize(rain_gardens['dist_to_flood'], inverse=True)  # Close to flood zone = HIGH score
rain_gardens['trash_density_score'] = normalize(rain_gardens['trash_count_100m'], inverse=True)  # Few trash cans nearby = HIGH score
rain_gardens['pedestrian_norm_score'] = normalize(rain_gardens['pedestrian_score'])  # More pedestrians = HIGH score

def calculate_priority_score(df, weights):
    total = sum(weights.values())
    if not np.isclose(total, 1.0):
        raise ValueError(f"Weights must sum to 1.0, got {total}")

    score = (
        weights['trash_lack'] * df['trash_lack_score'] +  # CHANGED: now rewards distance FROM trash
        weights['bus_proximity'] * df['bus_proximity_score'] +
        weights['flood_proximity'] * df['flood_proximity_score'] +
        weights['trash_density'] * df['trash_density_score'] +  # Low density = high priority
        weights['pedestrian'] * df['pedestrian_norm_score']
    )

    return score

def rank_with_custom_weights(df, trash_lack, bus_prox,
                             flood_prox, trash_dens, ped):
    """
    Interactive function to test different weight combinations

    Parameters:
    - trash_lack: Weight for being FAR from trash cans (dumping risk)
    - bus_prox: Weight for being CLOSE to bus stops (high traffic)
    - flood_prox: Weight for being CLOSE to flood zones (critical for flooding)
    - trash_dens: Weight for having FEW trash cans nearby (dumping risk)
    - ped: Weight for pedestrian traffic (wear and tear)
    """
    result_df = df.copy()

    weights = {
        'trash_lack': trash_lack,
        'bus_proximity': bus_prox,
        'flood_proximity': flood_prox,
        'trash_density': trash_dens,
        'pedestrian': ped
    }

    # Normalize weights if they don't sum to 1
    total = sum(weights.values())
    weights = {k: v/total for k, v in weights.items()}

    result_df['priority_score'] = calculate_priority_score(result_df, weights)
    result_df['rank'] = result_df['priority_score'].rank(ascending=False, method='min')

    return result_df.sort_values('priority_score', ascending=False)

In [38]:
def Top_Rain_Gardens(count,trash_score, bus_score,
                             flood_score, trash_density_score, ped_score):
  scenario = rank_with_custom_weights(
    rain_gardens,
    trash_lack = trash_score,
    bus_prox = bus_score,
    flood_prox = flood_score,
    trash_dens = trash_density_score,
    ped = ped_score
)
  print(scenario[['id','lon', 'lat', 'dist_to_flood', 'dist_to_trash', 'dist_to_bus','trash_count_100m', 'pedestrian_score', 'priority_score', 'rank']].head(count))

In [None]:
Top_Rain_Gardens(15,0,10000,0,0,0)

model_out = Top_Rain_Gardens(15,0,10000,0,0,0)

           id        lon        lat  dist_to_flood  dist_to_trash  \
146  160989.0 -73.830925  40.764979     527.876784     153.223430   
167  149619.0 -73.852586  40.784047    1986.179981     555.148465   
244  120450.0 -73.831128  40.764926     533.054841     158.559528   
166  186016.0 -73.810420  40.724310     409.728513     157.022021   
93   158169.0 -73.837385  40.742871    1012.131606     955.863112   
45   158172.0 -73.818471  40.759819     992.271455     455.471194   
134  188316.0 -73.927975  40.770610     177.393480      90.755949   
182  188317.0 -73.928445  40.770350     143.456000      68.318720   
52   188147.0 -73.806828  40.694929     197.816898      73.775414   
108  188146.0 -73.806828  40.694929     197.816898      73.775414   
120  188315.0 -73.928264  40.770780     192.139582     110.223041   
78   160887.0 -73.862254  40.728084     494.545883      91.419769   
90   187262.0 -73.833895  40.748842    1233.263520     651.711161   
107  149624.0 -73.816080  40.73617