# Setup.

In [1]:
# Imports.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# `create_map`: Create map with focusing on a specified city and zoom ratio.

In [1]:
from mpl_toolkits.basemap import Basemap

def create_map(city, resolution, zoom_ratio=0.8):
    """
    Description: Creates a Basemap instance for visualizing different regions of South Korea.
    Args:
        city        = str, the region to focus on. ['entire', 'seoul', 'busan', 'daejeon'].
        zoom_ratio  = float, factor to zoom in or out. Default is 1.0 (original focus).
                     Values > 1.0 zoom out, values < 1.0 zoom in.
        resolution  = 'i', 'h', 'f'. Determins how much detail the map should be.
    Returns: 
        m = Basemap instance, configured for the specified region.
    """
    # Define the base coordinates for each city
    coordinates = {
        'Korea': (34.0, 38.5, 125.5, 130.0),
#        'Seoul': (36.5, 38.5, 126.5, 128.0),   # Original coordinates.
        'Seoul': (36.8, 38.2, 126.3, 127.7) ,    # Manually rescaled coordinates.
        'Busan': (34.0, 36.0, 127.5, 129.5),
        'Daejeon': (35.5, 37.0, 126.5, 128.5)
    }
    
    if city not in coordinates:
        raise ValueError("Invalid city. Valid options are 'Korea', 'Seoul', 'Busan', or 'Daejeon'.")

    # Get base coordinates
    llcrnrlat, urcrnrlat, llcrnrlon, urcrnrlon = coordinates[city]
    
    # Adjust boundaries based on zoom ratio
    lat_center = (llcrnrlat + urcrnrlat) / 2
    lon_center = (llcrnrlon + urcrnrlon) / 2
    lat_span = (urcrnrlat - llcrnrlat) / zoom_ratio
    lon_span = (urcrnrlon - llcrnrlon) / zoom_ratio

    # Recalculate corners
    llcrnrlat = lat_center - lat_span / 2
    urcrnrlat = lat_center + lat_span / 2
    llcrnrlon = lon_center - lon_span / 2
    urcrnrlon = lon_center + lon_span / 2

    # Create the Basemap instance
    m = Basemap(
        projection='merc',
        llcrnrlat=llcrnrlat,
        urcrnrlat=urcrnrlat,
        llcrnrlon=llcrnrlon,
        urcrnrlon=urcrnrlon,
        resolution=resolution
    )

    return m

# `plot_orders`: plot orders into the Korean map, focusing on specified `city`.

In [2]:
def plot_orders(df, city='Korea', zoom_ratio=1.0, plot_main_cities=True, plot_Han_river=True, save_image=False,
                color_by_cluster=False, plot_paths=False, resolution='i', cluster_id='cluster',
                figsize=(15,8), title="No Title", cmap='viridis'):
    """
    Description: Simple method to plot orders on a Korean map with optional clustering visualization.
    Args:
        df                = pd.DataFrame, where df['longitude'] and df['latitude'] are set.
        city              = city to focus. ['Korea', 'Seoul', 'Busan', 'Daejeon'].
        zoom_ratio        = float, zoom level for the map. Default is 1.0.
        plot_main_cities  = if 'True', draws some main cities. ONLY works when `city`='Korea'.
        plot_Han_river    = if 'True', draws Han river with coordinates of bridges.
        color_by_cluster  = if 'True', colors points by cluster (k-means or hierarchical).
        resolution        = 'i', 'h', 'f'. Determins how much detail the map should be.
        cluster_id        = 'cluster' for agglomerative, 'cluster_kmeans' for kmeans.
        figsize           = (x, y). figsize for pyplot.
        title             = str to specify title of the plot.
        cmap              = str, colormap to use.
    Returns: None.
    """

    # Imports.
    import matplotlib.pyplot as plt
    
    # Set Korean font for pyplot.
    import matplotlib.font_manager as fm
    font_path = r'C:\Users\yana\AppData\Local\Microsoft\Windows\Fonts\NanumGothic-Regular.ttf'
    font_prop = fm.FontProperties(fname=font_path)
    plt.rcParams['font.family'] = font_prop.get_name()
    
    # Map size
    plt.figure(figsize=figsize)
    
    # Create a Basemap instance focused on the specified city
    m = create_map(city, zoom_ratio=zoom_ratio, resolution=resolution)
    
    # Draw map boundaries, coastlines, and country borders
    m.drawcoastlines()
    m.drawcountries()
    m.drawmapboundary(fill_color='lightblue')
    m.fillcontinents(color='lightgreen', lake_color='lightblue')
    
    # Plot main cities (if applicable)
    if plot_main_cities and city == 'Korea':
        city_data = [
            ("Seoul", 37.5665, 126.9780),   # Seoul
            ("Busan", 35.1796, 129.0756),   # Busan
            ("Daejeon", 36.3504, 127.3845)  # Daejeon
        ]
        for city_name, lat, lon in city_data:
            xx, yy = m(lon, lat)  # Convert latitude and longitude to map coordinates
            plt.text(xx - 50000, yy, city_name, fontsize=12, ha='right', color='black')  # Adjust text position
            m.scatter(xx, yy, marker='x', color='blue', zorder=5, s=100)  # Blue X marker

    # Plot Han river.
    if plot_Han_river:
        han_river_coords = pd.read_csv('./data/Han_river_bridge_coords.csv', encoding='utf-8-sig')
        river_coords = [m(lon, lat) for lat, lon in zip(han_river_coords['latitude'], han_river_coords['longitude'])]
        x_coords, y_coords = zip(*river_coords)  # Unpack the map coordinates
        plt.plot(x_coords, y_coords, color='blue', linewidth=2, zorder=4, label="Han River")
    
    # Convert latitude and longitude to map coordinates
    x, y = m(df['longitude'].values, df['latitude'].values)
    
    # Plot orders or clusters
    if color_by_cluster:
#        cluster_labels = df['cluster']

        cluster_labels = df[cluster_id]
        
        # Normalize cluster labels to ensure colorbar scaling is correct
        scatter = m.scatter(
            x, y,
            c=cluster_labels,
            cmap=cmap,
            s=10,
            alpha=0.7,
            zorder=5,
            vmin=cluster_labels.min(),  # Set minimum value for the colorbar
            vmax=cluster_labels.max()   # Set maximum value for the colorbar
        )
        plt.colorbar(scatter, label='Cluster')
    else:
        # Default red color for orders
        m.scatter(x, y, marker='o', color='red', zorder=5, s=10)

    # Plot paths.
    if plot_paths:
        results = []
    
        def get_closest_pnt(cur_pnt, dest_list):
            """Returns the index of the closest point to cur_pnt in dest_list."""
            cur_pnt_coords = np.array([cur_pnt[0], cur_pnt[1]], dtype=np.float64)
            dest_coords = np.array(dest_list, dtype=np.float64)
            distances = np.linalg.norm(dest_coords - cur_pnt_coords, axis=1)
            return np.argmin(distances)
    
        for cluster in df['cluster'].unique():
            dest_list = df[df['cluster'] == cluster][['longitude', 'latitude']].to_numpy().tolist()
            
            # Fix: Start from a point within the current cluster (first point in the filtered list)
            cur_pnt = dest_list.pop(0)
            path = [tuple(cur_pnt)]
    
            while dest_list:
                next_idx = get_closest_pnt(cur_pnt, dest_list)
                cur_pnt = dest_list.pop(next_idx)  # Fix: Remove the used point properly
                path.append(tuple(cur_pnt))
    
            results.append(path)

        for cluster in results:
            if len(cluster) > 1:
                path_coords = [m(lon, lat) for lon, lat in cluster]
                x_coords, y_coords = zip(*path_coords)
                
                x_coords += (x_coords[0],)
                y_coords += (y_coords[0],)

                plt.plot(x_coords, y_coords, marker='o', linestyle='-', linewidth=2, zorder=6, label=f'Cluster {results.index(cluster) + 1}')

    
    # Title.
    plt.title(title + f' (City: {city})')

    # Save as .JPG.
    if save_image:
        plt.savefig(f'./tmp/{title}.jpg', format='jpg', dpi=300, bbox_inches='tight')

    # Plot.
    plt.show()



# `plot_orders_by_month`: plot orders by month into the Korean map, creating subplots for each month.

In [9]:
def plot_orders_by_month(df, city='Korea', zoom_ratio=1.0, is_main_cities=True):
    """
    Description: Create subplots to plot orders for each month in Korean map.
    Args:
        df              = pd.DataFrame, where df['longitude'], df['latitude'], and df['date'] are set.
        city            = city to focus. ['Korea', 'Seoul', 'Busan', 'Daejeon']
        is_main_cities  = if 'True', draws some main cities. ONLY works when `city`='Korea'.
    Returns: None.
    """
    # Imports.
    import matplotlib.pyplot as plt
    
    # Convert the 'date' column to datetime format and extract month.
    df['date'] = pd.to_datetime(df['date'])
    df['month'] = df['date'].dt.month

    # Create a sorted list of unique months in the data.
    months = sorted(df['month'].unique())
    n_months = len(months)

    # Create subplots with 4 columns per row.
    n_cols = 4
    n_rows = -(-n_months // n_cols)  # Ceiling division
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, n_rows * 5))
    axes = axes.flatten()  # Flatten axes for easy indexing

    for i, month in enumerate(months):
        # Filter data for the current month.
        month_df = df[df['month'] == month]

        # Create a Basemap instance for the specified city.
        m = create_map(city, zoom_ratio=zoom_ratio)

        # Draw map boundaries, coastlines, and country borders.
        m.drawcoastlines(ax=axes[i])
        m.drawcountries(ax=axes[i])
        m.drawmapboundary(fill_color='lightblue', ax=axes[i])
        m.fillcontinents(color='lightgreen', lake_color='lightblue', ax=axes[i])

        # Plot some main cities if enabled and city is 'Korea'.
        if is_main_cities and city == 'Korea':
            city_data = [
                ("Seoul", 37.5665, 126.9780),   # Seoul
                ("Busan", 35.1796, 129.0756),   # Busan
                ("Daejeon", 36.3504, 127.3845)  # Daejeon
            ]
            for c, lat, lon in city_data:
                xx, yy = m(lon, lat)
                axes[i].text(xx - 50000, yy, c, fontsize=10, ha='right', color='black')
                m.scatter(xx, yy, marker='x', color='blue', zorder=5, s=50, ax=axes[i])

        # Plot orders for the current month.
        x, y = m(month_df['longitude'].values, month_df['latitude'].values)
        m.scatter(x, y, marker='o', color='red', zorder=5, s=10, ax=axes[i])

        # Set the title for the subplot.
        axes[i].set_title(f'Orders in Month: {month} (City: {city})')

    # Turn off unused axes.
    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()

# `get_Han_river_coords`: return coords of each bridge on the Han river.

In [8]:
import pandas as pd

def get_Han_river_coords(verbose=False, write_file=False):
    """
    Use geopy to get coords of each bridge and return.
    """
    from geopy.geocoders import Nominatim
    from geopy.exc import GeocoderTimedOut
    
    # Initialize geolocator
    geolocator = Nominatim(user_agent="han_river_bridges_locator")
    
    # List of major bridges along the Han River
    han_river_bridges = [
        "일산대교", "김포대교", "행주대교", 
        "방화대교", "마곡대교", "가양대교", 
        "월드컵대교", "성산대교", "양화대교", 
        "당산철교", "서강대교", "마포대교", 
        "원효대교", "한강철교", "한강대교", 
        "동작대교", "반포대교", "한남대교", 
        "동호대교", "성수대교", "영동대교", 
        "청담대교", "잠실대교", "잠실철교", 
        "올림픽대교", "천호대교", "광진대교", 
        "구리암사대교", "강동대교", "팔당대교"
    ]
    
    
    # Fetch coordinates for each bridge
    bridge_coords = []
    for bridge in han_river_bridges:
        try:
            loc = geolocator.geocode(bridge, timeout=10)  # Add a timeout for geocoding
            if loc:
                bridge_coords.append((bridge, loc.latitude, loc.longitude))
                if verbose:
                    print(f"{bridge}: {loc.latitude}, {loc.longitude}")
            else:
                if verbose:
                    print(f"Could not find coordinates for {bridge}.")
        except GeocoderTimedOut:
            print(f"Geocoding timed out for {bridge}. Retrying...")
            continue
    
    # Output the results
    print("\nBridge Coordinates:")
    if verbose:
        for name, lat, lon in bridge_coords:
            print(f"{name}: Latitude {lat}, Longitude {lon}")

    if write_file:
        df_han_coord = pd.DataFrame(bridge_coords, columns=['Name', 'latitude', 'longitude'])
        df_han_coord.to_csv("./data/Han_river_bridge_coords.csv", index=False, encoding="utf-8-sig")

    return bridge_coords


# `sort_orders_with_riders`: Sort and write the file like './data/20250126_100yrs_results_20250124_rider_LHJ.csv'.

In [None]:
def sort_orders_with_riders(df):
#    df = pd.read_csv('./data/20250126_100yrs_results_20250124_rider_LHJ.csv', encoding='utf-8-sig')

    df['date'] = pd.to_datetime(df['date'], format='%H:%M', errors='coerce').dt.time
    df_sorted = df.sort_values(by=['date', 'cluster', '기사 명'], ascending=True)

#    df_sorted.to_csv('./tmp/20250126_100yrs_results_20250124_rider_ordered.csv', encoding='utf-8-sig')

    return df_sorted