## 1. Load & Preprocess Data: Geocoding and Distance Analysis


In [9]:
# ============================================================
# 1. Load and preprocess smoking zone data (Yongsan-gu)
#    - Performs geocoding (if needed)
#    - Computes average distance between zones
# ============================================================

import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import time
import os

# === Path Setup ===
base_dir = r"C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis"
file_path = os.path.join(base_dir, "Data", "서울특별시 용산구_흡연구역_20240719.csv")
output_path = os.path.join(base_dir, "Output", "yongsan_smoking_zones_map.html")

# Ensure output folder exists
os.makedirs(os.path.dirname(output_path), exist_ok=True)

# Load dataset
df = pd.read_csv(file_path, encoding='cp949')

# Geocoding if lat/lon not present
if '위도' not in df.columns or '경도' not in df.columns:
    geolocator = Nominatim(user_agent="smoking_zone_mapper")
    latitudes, longitudes = [], []

    for address in df['소재지도로명주소']:  # Modify if needed
        try:
            location = geolocator.geocode(f"용산구 {address}")
            if location:
                latitudes.append(location.latitude)
                longitudes.append(location.longitude)
            else:
                latitudes.append(None)
                longitudes.append(None)
        except:
            latitudes.append(None)
            longitudes.append(None)
        time.sleep(1)  # API rate limit

    df['latitude'] = latitudes
    df['longitude'] = longitudes
else:
    df = df.rename(columns={'위도': 'latitude', '경도': 'longitude'})

# Clean coordinates
df = df.dropna(subset=['latitude', 'longitude'])
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

# Pairwise distance calculation
distances = []
coords = df[['latitude', 'longitude']].values

for i in range(len(coords)):
    for j in range(i + 1, len(coords)):
        d = geodesic(coords[i], coords[j]).meters
        distances.append(d)

if distances:
    avg_dist = sum(distances) / len(distances)
    print(f"✅ Average distance between zones: {avg_dist:.2f} meters")


✅ Average distance between zones: 1672.94 meters


## 2. Visualize Smoking Zones on Interactive Map & Save as HTML


In [11]:
# ============================================================
# 2. Visualize Yongsan-gu smoking zones with Folium
#    - Interactive map rendering
#    - Saved as HTML and opened automatically
# ============================================================

import folium
import webbrowser

# Create folium map
map_center = [df['latitude'].mean(), df['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=14)

# Add markers
for _, row in df.iterrows():
    name = row.get('시설명', 'Smoking Zone')
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=name,
        icon=folium.Icon(color='green', icon='smoking', prefix='fa')
    ).add_to(m)

# Show in notebook
m

# Save to HTML
m.save(output_path)
print(f"✅ Map saved to: {output_path}")

# Open in default browser
webbrowser.open(output_path)

✅ Map saved to: C:\Users\ghwns\HJ_git\Misc-Projects\seoul-smoking-gis\Output\yongsan_smoking_zones_map.html


True