In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from tqdm import tqdm

# Load the weather data
weather_df = pd.read_csv('hs/Heavy_Snow.csv')

# Normalize the relevant columns
weather_df['county_clean'] = weather_df['CZ_NAME'].str.strip().str.lower()
weather_df['state_clean'] = weather_df['STATE'].str.strip().str.lower()

# Get distinct county-state pairs
unique_locations = weather_df[['county_clean', 'state_clean']].drop_duplicates()
total_counties = unique_locations.shape[0]

# Save the distinct county-state combination to CSV
unique_locations.to_csv('distinct_county_state_combinations.csv', index=False)

# Print unique county-state pairs before geocoding
print(f"Total distinct county-state pairs to geocode: {total_counties}")
print("\nUnique county-state pairs:")
print(unique_locations)

# Initialize geolocator
geolocator = Nominatim(user_agent="county-geocoder")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)  # Respect API limits

# Function to get coordinates
def get_lat_lng(row):
    location = geocode(f"{row['county_clean']} County, {row['state_clean']}, USA")
    if location:
        return pd.Series([location.latitude, location.longitude])
    else:
        return pd.Series([None, None])

# Apply geocoding with progress bar
tqdm.pandas(desc="Geocoding counties")
unique_locations[['lat', 'lng']] = unique_locations.progress_apply(get_lat_lng, axis=1)

# Save distinct counties with coordinates
unique_locations.to_csv('distinct_counties_with_coordinates.csv', index=False)

# Merge back with original data
weather_df = weather_df.merge(unique_locations, on=['county_clean', 'state_clean'], how='left')

# Save merged output
weather_df.to_csv('heavy_snow_with_coordinates_geopy.csv', index=False)

print("\n✅ Geocoding completed and saved.")