In [None]:
# ============================================================
# 1. Load and preprocess smoking zone data (Yongsan + Yeongdeungpo)
# ============================================================

import pandas as pd
from geopy.distance import geodesic
import folium
import webbrowser
import os

# ---------------------- Config ----------------------
INPUT_PATH = r"C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Data\smoking_zone.csv"
OUTPUT_PATH = r"C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Output\merged_smoking_zones_map.html"

DISTRICT_CENTERS = {
    '용산구': (37.5325, 126.9900),
    '영등포구': (37.5219, 126.9244),
}

DISTRICT_COLORS = {
    '용산구': 'green',
    '영등포구': 'blue',
}


# ---------------------- Helpers ----------------------
def remove_outliers(df, centers, max_distance=5000):
    """Remove points further than max_distance (in meters) from district center."""
    def is_far(row):
        gu = row.get('자치구')
        center = centers.get(gu)
        if not center:
            return False
        return geodesic((row['latitude'], row['longitude']), center).meters > max_distance

    df['is_outlier'] = df.apply(is_far, axis=1)
    return df[~df['is_outlier']].drop(columns='is_outlier')


def compute_avg_nearest_distances(df):
    """Compute average nearest smoking zone distance within each district."""
    from collections import defaultdict
    results = defaultdict(lambda: None)

    for gu, group in df.groupby("자치구"):
        coords = group[['latitude', 'longitude']].values
        if len(coords) < 2:
            print(f"⚠️ {gu}: Not enough data (less than 2 points)")
            continue

        nearest_dists = []
        for i in range(len(coords)):
            dists = [geodesic(coords[i], coords[j]).meters
                     for j in range(len(coords)) if i != j]
            nearest_dists.append(min(dists))

        avg_dist = sum(nearest_dists) / len(nearest_dists)
        results[gu] = avg_dist
        print(f"✅ {gu}: Average nearest distance = {avg_dist:.2f} meters")

    return results


def plot_folium_map(df, color_map, output_path):
    """Create folium map with markers per district."""
    center = [df['latitude'].mean(), df['longitude'].mean()]
    m = folium.Map(location=center, zoom_start=13)

    for _, row in df.iterrows():
        gu = row.get('자치구', '')
        name = row.get('설치 위치', 'Smoking Zone')
        lat, lon = row['latitude'], row['longitude']
        color = color_map.get(gu, 'gray')

        folium.Marker(
            location=[lat, lon],
            popup=f"{gu} - {name}",
            tooltip=name,
            icon=folium.Icon(color=color, icon='info-sign')  # safer than 'smoking'
        ).add_to(m)

    m.save(output_path)
    print(f"✅ Map saved to: {output_path}")
    webbrowser.open(output_path)


# ---------------------- Main ----------------------
def main():
    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)

    # Load
    df = pd.read_csv(INPUT_PATH, encoding='cp949')
    df = df.rename(columns={'위도': 'latitude', '경도': 'longitude'})
    df = df.dropna(subset=['latitude', 'longitude'])
    df['latitude'] = df['latitude'].astype(float)
    df['longitude'] = df['longitude'].astype(float)

    # Clean
    df = remove_outliers(df, DISTRICT_CENTERS)
    avg_dists = compute_avg_nearest_distances(df)

    # Visualize
    plot_folium_map(df, DISTRICT_COLORS, OUTPUT_PATH)


if __name__ == "__main__":
    main()

✅ 영등포구: Average nearest distance = 193.97 meters
✅ 용산구: Average nearest distance = 138.87 meters
✅ Map saved to: C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Output\merged_smoking_zones_map.html
