# Import Libraries

In [8]:
import pandas as pd
from geopy.distance import geodesic
import folium
import os

# Configuration

In [9]:
INPUT_PATH = "./Data/smoking_zone.csv"
OUTPUT_PATH = "./Output/merged_smoking_zones_map.html"

DISTRICT_CENTERS = {
    '용산구': (37.5325, 126.9900),
    '영등포구': (37.5219, 126.9244),
}

DISTRICT_COLORS = {
    '용산구': 'green',
    '영등포구': 'blue',
}

# Helper Functions

In [10]:
def remove_outliers(df, centers, max_distance=5000):
    """Remove points too far from district centers."""
    def is_far(row):
        gu = row.get('자치구')
        center = centers.get(gu)
        if not center:
            return False
        return geodesic((row['latitude'], row['longitude']), center).meters > max_distance
    
    df['is_outlier'] = df.apply(is_far, axis=1)
    return df[~df['is_outlier']].drop(columns='is_outlier')


def compute_avg_nearest_distances(df):
    """Average nearest neighbor distance per district."""
    results = {}
    for gu, group in df.groupby("자치구"):
        coords = group[['latitude', 'longitude']].values
        if len(coords) < 2:
            print(f"{gu}: not enough data (less than 2 points)")
            continue
        
        nearest_dists = []
        for i in range(len(coords)):
            dists = [geodesic(coords[i], coords[j]).meters
                     for j in range(len(coords)) if i != j]
            nearest_dists.append(min(dists))
        
        avg_dist = sum(nearest_dists) / len(nearest_dists)
        results[gu] = avg_dist
        print(f"{gu}: average nearest distance = {avg_dist:.2f} m")
    return results


def plot_folium_map(df, color_map, output_path):
    """Plot interactive map with Folium."""
    center = [df['latitude'].mean(), df['longitude'].mean()]
    m = folium.Map(location=center, zoom_start=13)

    for _, row in df.iterrows():
        gu = row['자치구']
        name = row.get('설치 위치', 'Smoking Zone')
        lat, lon = row['latitude'], row['longitude']
        color = color_map.get(gu, 'gray')

        folium.Marker(
            location=[lat, lon],
            popup=f"{gu} - {name}",
            tooltip=name,
            icon=folium.Icon(color=color, icon='info-sign')
        ).add_to(m)

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    m.save(output_path)
    print(f"Map saved to: {output_path}")

# Load and Preprocess Data

In [11]:
df = pd.read_csv(INPUT_PATH, encoding='cp949')
df = df.rename(columns={'위도': 'latitude', '경도': 'longitude'})
df = df.dropna(subset=['latitude', 'longitude'])
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)

df = remove_outliers(df, DISTRICT_CENTERS)
df.head()

Unnamed: 0,자치구,설치 위치,latitude,longitude
0,용산구,서울특별시 용산구 서울역 광장 15번출구,37.553149,126.968881
1,용산구,서울특별시 용산구 서울역 광장 1번출구,37.55376,126.969662
2,용산구,서울특별시 용산구 용산역 광장,37.528404,126.965569
3,용산구,서울특별시 용산구 용산구청 옥상,37.532709,126.99
4,용산구,서울특별시 용산구 용산구청 2층 외부,37.532709,126.99


# Compute Average Distances

In [12]:
avg_dists = compute_avg_nearest_distances(df)
avg_dists

영등포구: average nearest distance = 193.97 m
용산구: average nearest distance = 138.87 m


{'영등포구': 193.96671263075797, '용산구': 138.87295498149152}

# Generate Map

In [13]:
plot_folium_map(df, DISTRICT_COLORS, OUTPUT_PATH)

Map saved to: ./Output/merged_smoking_zones_map.html
