## 1. Load & Preprocess Data: Geocoding and Distance Analysis


In [8]:
# ============================================================
# 1. Load and preprocess smoking zone data (Yongsan + Yeongdeungpo)
#    - Removes distant outliers (5km+ from district center)
#    - Computes average nearest distance *within each district*
# ============================================================

import pandas as pd
from geopy.distance import geodesic
import os

# File paths
file_path = r"C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Data\서울특별시_용산구_영등포구_흡연구역.csv"
output_path = r"C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Output\merged_smoking_zones_map.html"

# Ensure output directory exists
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# Load dataset
df = pd.read_csv(file_path, encoding='cp949')
df = df.rename(columns={'위도': 'latitude', '경도': 'longitude'})
df = df.dropna(subset=['latitude', 'longitude'])
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

# ------------------------------------------------------------
# Remove geographic outliers (5km+ from district center)
# ------------------------------------------------------------
district_centers = {
    '용산구': (37.5325, 126.9900),
    '영등포구': (37.5219, 126.9244),
}

def is_outlier(row):
    gu = row['자치구']
    center = district_centers.get(gu)
    if not center:
        return False
    d = geodesic((row['latitude'], row['longitude']), center).meters
    return d > 5000

df['is_outlier'] = df.apply(is_outlier, axis=1)
df = df[~df['is_outlier']].drop(columns='is_outlier')

# ------------------------------------------------------------
# Compute average nearest distance *within each district*
# ------------------------------------------------------------
gu_avg_nearest = {}

for gu, group in df.groupby("자치구"):
    coords = group[['latitude', 'longitude']].values
    nearest_dists = []

    if len(coords) < 2:
        gu_avg_nearest[gu] = None
        print(f"⚠️ {gu}: Not enough data (less than 2 points)")
        continue

    for i in range(len(coords)):
        dists = []
        for j in range(len(coords)):
            if i == j:
                continue
            d = geodesic(coords[i], coords[j]).meters
            dists.append(d)
        nearest_dists.append(min(dists))

    avg_nearest = sum(nearest_dists) / len(nearest_dists)
    gu_avg_nearest[gu] = avg_nearest
    print(f"✅ {gu}: Average nearest distance = {avg_nearest:.2f} meters")

✅ 영등포구: Average nearest distance = 193.97 meters
✅ 용산구: Average nearest distance = 138.87 meters


## 2. Visualize Smoking Zones on Interactive Map & Save as HTML


In [10]:
# ============================================================
# 2. Visualize merged smoking zones with Folium
#    - Color-coded by district
#    - Tooltip appears on hover
# ============================================================

import folium
import webbrowser

# Map center
map_center = [df['latitude'].mean(), df['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=13)

# Color by district
color_map = {
    '용산구': 'green',
    '영등포구': 'blue',
}

# Add markers with tooltip
for _, row in df.iterrows():
    gu = row.get('자치구', '')
    name = row.get('설치 위치', 'Smoking Zone')
    lat = row['latitude']
    lon = row['longitude']
    color = color_map.get(gu, 'gray')

    folium.Marker(
        location=[lat, lon],
        popup=f"{gu} - {name}",
        tooltip=name,  # 👈 Hover tooltip here!
        icon=folium.Icon(color=color, icon='smoking', prefix='fa')
    ).add_to(m)

# Save and open
m.save(output_path)
print(f"✅ Map saved to: {output_path}")
webbrowser.open(output_path)

✅ Map saved to: C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Output\merged_smoking_zones_map.html


True