In [None]:
!pip install requests pandas tqdm



In [None]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, sqrt, atan2


In [None]:
# station dataset
main_df = pd.read_csv("/content/drive/MyDrive/manhattan_stations_.csv")

# Formatted datasets (previously prepared)
parks_df = pd.read_csv("/content/drive/MyDrive/Formatted_Manhattan_Parks_Dataset.csv")
libraries_df = pd.read_csv("/content/drive/MyDrive/Updated_Manhattan_Libraries_Dataset.csv")
colleges_df = pd.read_csv("/content/drive/MyDrive/Formatted_Manhattan_Colleges_Dataset.csv")
attractions_df = pd.read_csv("/content/drive/MyDrive/Formatted_Manhattan_Tourist_Attractions_Dataset.csv")
athletics_df = pd.read_csv("/content/drive/MyDrive/Formatted_Manhattan_Athletic_Facilities_Dataset.csv")


In [None]:
def haversine_np(lat1, lon1, lat2, lon2):
    R = 6371000  # Earth radius in meters
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    d_phi = np.radians(lat2 - lat1)
    d_lambda = np.radians(lon2 - lon1)

    a = np.sin(d_phi / 2.0) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(d_lambda / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c  # returns distance in meters


In [None]:
# Convert each location dataset to (lat, lng) arrays
datasets = {
    'Parks': parks_df[['START_LAT', 'START_LNG']].values,
    'Libraries': libraries_df[['START_LAT', 'START_LNG']].values,
    'Colleges': colleges_df[['START_LAT', 'START_LNG']].values,
    'Attractions': attractions_df[['START_LAT', 'START_LNG']].values,
    'Athletics': athletics_df[['START_LAT', 'START_LNG']].values,
}


In [None]:
# Extract main station coordinates
main_coords = main_df[['start_station_id', 'start_lat', 'start_lng']].values

# Prepare result list
results = []

for idx, (station_id, lat1, lng1) in enumerate(main_coords):
    row = {'station_id': station_id}
    for label, points in datasets.items():
        distances = haversine_np(lat1, lng1, points[:, 0], points[:, 1])
        row[f'{label}_500m'] = np.sum(distances <= 500)
    results.append(row)

    if idx % 50 == 0:
        print(f"Processed {idx + 1} / {len(main_coords)} stations...")

Processed 1 / 1715 stations...
Processed 51 / 1715 stations...
Processed 101 / 1715 stations...
Processed 151 / 1715 stations...
Processed 201 / 1715 stations...
Processed 251 / 1715 stations...
Processed 301 / 1715 stations...
Processed 351 / 1715 stations...
Processed 401 / 1715 stations...
Processed 451 / 1715 stations...
Processed 501 / 1715 stations...
Processed 551 / 1715 stations...
Processed 601 / 1715 stations...
Processed 651 / 1715 stations...
Processed 701 / 1715 stations...
Processed 751 / 1715 stations...
Processed 801 / 1715 stations...
Processed 851 / 1715 stations...
Processed 901 / 1715 stations...
Processed 951 / 1715 stations...
Processed 1001 / 1715 stations...
Processed 1051 / 1715 stations...
Processed 1101 / 1715 stations...
Processed 1151 / 1715 stations...
Processed 1201 / 1715 stations...
Processed 1251 / 1715 stations...
Processed 1301 / 1715 stations...
Processed 1351 / 1715 stations...
Processed 1401 / 1715 stations...
Processed 1451 / 1715 stations...
Pro

In [None]:
# Step 6: Save Results
print(" Saving results to DataFrame and exporting to CSV...")

summary_df = pd.DataFrame(results)
summary_df.to_csv("proximity_summary_by_station.csv", index=False)

print(" Proximity analysis complete.")
print(" Output file: summary_by_station_using_urban_features.csv")
print(summary_df.head())


 Saving results to DataFrame and exporting to CSV...
 Proximity analysis complete.
 Output file: summary_by_station_using_urban_features.csv
  station_id  Parks_500m  Libraries_500m  Colleges_500m  Attractions_500m  \
0    4818.03           0               0              0                 0   
1    4821.03           0               0              0                 0   
2     4821.1           0               0              0                 0   
3    4829.01           0               0              0                 1   
4    4832.07           0               0              0                 0   

   Athletics_500m  
0               0  
1               0  
2               0  
3               0  
4               0  
