In [2]:
# !pip install folium
# !pip install pyproj

In [43]:
import pandas as pd
import folium
import math

from itertools import combinations
from pyproj import Proj, transform
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

from typing import List

In [44]:
def preprocess_data(path: str) -> pd.DataFrame:
    """
    "Note": Modify and use according to your own data.
    Or you don't need to use this code, and you just insert some code about preprocess in "main"

    Explanation
    Data Path is received as inputs and data is purified in the order of "name"/"latitude"/"longitude".

    Arguments
        path: A path of a file in the form of 'xlsx' and 'csv' is input.

    Return
        Pandas Data Frame: Form of Pandas Data Frame with Column in the order of "Name", "Latitude", and "Longitude"
    """

    if "xlsx" in path:
        data = pd.read_excel(path)
        ############ input/output에 따라 수정 ####################
        if "대구분명칭" in data.columns:
            data = data[["대구분명칭", "위도", "경도"]]
        elif "시장명" in data.columns:
            data = data[["시장명", "위도", "경도"]]
        data.columns = ["name", "latitude", "longitude"]
        ##########################################################
    elif "csv" in path:
        data = pd.read_csv(path, header=None)  # header 옵션 역시 본인의 데이터에 맞추어 수정한다.
        ############ input/output에 따라 수정 ####################
        data.columns = ["longitude", "latitude", "name"]
        data = data[["name", "latitude", "longitude"]]
        ##########################################################
    return data

In [45]:
# market: 지역_공공공장소/위도/경도
# population: 지역_유동인구/위도/경도
market_location_path = '../data/울진군예상좌표.xlsx'
population_location_path = '../data/울진_편의점.csv'

market_data = preprocess_data(market_location_path)
population_data = preprocess_data(population_location_path)

In [47]:
def coordinate_change(data: pd.DataFrame, c1: str, c2: str) -> pd.DataFrame:
    """
    Explanation
        The latitude and longitude existing in the data frame are converted from the coordinate c1 to the coordinate c2 to be converted.
    Arguments
        data: The columns are in the form of a Pandas Data Frame in the order of "name", "latitude", and "longitude".
        c1: The original latitude and longitude coordinate system.
        c2: The latitude and longitude coordinate system to convert.
            c1 & c2: something like 'epsg:5178', 'epsg:4326', etc..

    Return
        Pandas Data Frame: Data frame with converted latitude and longitude coordinates.
    """
    proj_c1 = Proj(init=c1)
    proj_c2 = Proj(init=c2)

    for i in tqdm(range(len(data))):
        change_long, change_lat = transform(
            proj_c1, proj_c2, data["longitude"][i], data["latitude"][i]
        )

        data["longitude"][i] = change_long
        data["latitude"][i] = change_lat

    return data

In [48]:
market_data = coordinate_change(market_data, 'epsg:4326', 'epsg:5178')
population_data = coordinate_change(population_data, 'epsg:4326', 'epsg:5178')

100%|██████████| 50/50 [00:01<00:00, 47.88it/s]
100%|██████████| 86/86 [00:01<00:00, 55.09it/s]


In [49]:
def shortest_distance(F: pd.DataFrame, L: pd.DataFrame):
    """
    Explanation
        Create the shortest matrix between public facilities and floating population.

    Arguments
        F: Coordinates of public facilities.
        L: Coordinates of the floating population.

    Return
        Pandas Data Frame: Shortest distance matrix between public facilities and floating population
    """
    F_list = []
    L_list = []
    for i in range(len(F)):
        name = f"F_{i}"
        F_list.append(name)

    for i in range(len(L)):
        name = f"L_{i}"
        L_list.append(name)

    distance = pd.DataFrame(columns=F_list, index=L_list)
    for i in range(len(distance)):
        for j, col in enumerate(distance.columns):
            square_sum = ((F["latitude"][j] - L["latitude"][i]) ** 2) + (
                (F["longitude"][j] - L["longitude"][i]) ** 2
            )
            dist = math.sqrt(square_sum)
            distance[col][i] = dist

    return distance

In [50]:
distance = shortest_distance(market_data, population_data)
distance

Unnamed: 0,F_0,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,F_9,...,F_40,F_41,F_42,F_43,F_44,F_45,F_46,F_47,F_48,F_49
L_0,42401.971504,42122.204806,175.076635,5925.966516,17809.214319,12313.100448,47104.616493,46969.122335,41610.331284,42120.794076,...,12662.06577,39767.597941,6453.096898,47653.412492,47777.647371,15766.089309,12503.037186,16153.117361,47587.749086,12445.88841
L_1,42317.794149,42037.991816,223.459609,5889.006474,17716.077727,12230.917343,47022.164441,46886.232204,41525.975214,42036.641404,...,12581.245882,39687.522521,6413.571543,47568.894753,47692.756526,15687.002047,12420.9633,16061.701944,47504.906942,12367.297923
L_2,27223.901531,26944.356846,15314.179312,10713.322647,4127.794672,2877.55191,31926.723121,31789.359128,26433.454218,26942.593276,...,2578.302516,24622.088602,10253.213991,32477.424116,32605.034271,1197.832725,2690.91146,2608.575097,32408.129911,2920.568863
L_3,42509.889327,42230.120816,114.950411,6017.460628,17914.89024,12421.055722,47212.57281,47077.077678,41718.239608,42228.713109,...,12769.982848,39875.465451,6546.03753,47761.311614,47885.521206,15873.884536,12610.991293,16259.574798,47695.704821,12553.639689
L_4,42153.557074,41873.496831,544.634763,6023.121883,17477.828385,12088.609634,46870.785727,46731.566253,41360.404655,41872.612936,...,12448.831368,39554.06886,6533.659763,47401.725125,47522.668433,15565.270703,12279.331354,15840.541304,47350.571225,12251.192901
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
L_81,56688.286482,56408.214191,14162.419624,19615.289838,31901.090422,26611.319136,61401.657908,61264.285935,55895.033401,56407.350708,...,26961.181794,54065.006115,20165.632733,61936.024612,62056.067569,30061.29461,26801.446771,30316.023555,61883.138855,26739.839168
L_82,24535.389266,24809.752542,66940.884922,62151.751666,49221.276527,54547.255131,20157.683269,20207.877291,25305.786269,24816.584329,...,54230.622731,27672.998959,61639.856917,19295.164348,19131.901648,51216.441813,54360.404285,50756.347681,19614.568113,54507.373989
L_83,56036.026778,55789.031293,31277.147492,35616.195877,35390.368618,35577.695585,60712.563899,60420.336522,55306.771103,55812.347876,...,35992.342463,55387.729885,35901.415773,60329.851361,60275.127525,38039.283545,35691.356814,35223.392245,60971.641322,36250.459116
L_84,56663.130935,56383.067614,14136.640798,19586.333072,31878.047259,26585.538711,61376.029312,61238.779066,55869.925558,56382.186802,...,26935.181146,54038.763758,20136.642162,61910.992293,62031.14876,30034.981343,26775.649308,30292.345168,61857.619696,26713.498265


In [51]:
def p_list_set(distance_data: pd.DataFrame, p: int) -> List[List]:
    """
    Explanation
        Based on F(Public Facilities), '2p' public facilities with the shortest
         distance from the floating population coordinates are selected

    Args
        distance_data: The matrix of distances between F and L. (column = F, row = L)
        p: The number of public facilities to be finally selected.

    return
        p_list_set: A set of p lists tied up in p
        ex) candidate= [1,2,3,4,5,6,7,8,9,10] / p=3
            p_list_set = [
                [1,2,3],
                [2,3,4],
                [3,4,5]
            ]
    """
    # The sum of the distances between the coordinates of the floating population for public facilities.
    col_sum = list(distance_data.sum(axis=0))

    col_sum_tuple = []  # Tie col_sum with index.
    for i in range(len(col_sum)):
        tup = (i, col_sum[i])
        col_sum_tuple.append(tup)

    col_sum_tuple.sort(key=lambda x: x[1])
    col_sum_tuple = col_sum_tuple[: 2 * p]  # Choose the top 2p based on distance.

    p_list_set = [col_sum_tuple[i : i + p] for i in range(p)]

    return p_list_set

In [52]:
def candidate_place(
    pb: pd.DataFrame, distance: pd.DataFrame, p_list_set: List[List]
) -> List:
    """
    Explanation
        Only names are extracted from DataFrame having a minimal distance within each set.

    Args:
        pb: DataFrame for public facilities.
        distance: DataFrame about the distance between F and L

        p_list_set: In distance, a set of p lists grouped by p based on distance

    Return:
        List: List of names of p public facilities.
    """
    min_sum_list = []  # take the minimum values in the pth list.
    for i in range(len(p_list_set)):
        tup_check = []
        for j in p_list_set[i]:
            tup_check.append(f"F_{j[0]}")
        check_df = distance[tup_check]

        check_df["min"] = 0  # generate 'min' column
        for k in range(len(check_df)):
            k_th_row = check_df.iloc[k][:-1]  # exclude 'min' column
            check_df["min"][k] = min(k_th_row)

        min_sum_value = sum(check_df["min"])
        min_sum_list.append(min_sum_value)

    final_index = min_sum_list.index(min(min_sum_list))
    final_set = p_list_set[final_index]
    final_set.sort(key=lambda x: x[0])

    final_idx = [idx for idx, dist in final_set]
    final_market_data = pb.iloc[final_idx, :]
    final_market_data.reset_index(drop=True, inplace=True)

    name_list = [name for name in final_market_data["name"]]
    return name_list

In [53]:
char_list = []
for p in range(3, 11) :
    p_list = p_list_set(distance, p)
    name_list = candidate_place(market_data, distance, p_list)

    char_list.extend(name_list)

In [54]:
def top_value(char_list: List) -> int:
    """Heuristic Method with P-Median
    Explanation
        Get the top three to six. (If there is a duplicate value, bring up to six.)

    Args:
        char_list: A list of the names of the final candidates.

    Return:
        int: The number of final candidates to get
    """
    appearance_candidate = list(pd.Series(char_list).value_counts())

    num_of_candidate = 3
    for first, second in zip(appearance_candidate[2:], appearance_candidate[3:]):
        if first == second:
            num_of_candidate += 1
        else:
            break

    return num_of_candidate

In [55]:
def make_finalset(market_data: pd.DataFrame, char_list: List) -> pd.DataFrame:
    """Heuristic Method with P-Median
    Explanation
        After receiving the char_list, which is the list of the final candidates,
         The final candidates is mapped with public facility data
         to return a DataFrame containing only the final candidates.

    Args:
        market_data: Data containing market location information.
        char_list: The list with the names of the final candidates overlapped.

    Return:
        Pandas DataFrame: The final DataFrame containing the names, latitudes, and longitude of the final candidates.
    """

    num_of_candidate = top_value(char_list)
    final_name_list = pd.Series(char_list).value_counts().index[:num_of_candidate]

    market_index = [
        (market_data[market_data["name"] == name].index)[0] for name in final_name_list
    ]

    market_final = market_data.iloc[market_index, :]
    market_final.reset_index(drop=True, inplace=True)
    return market_final

In [56]:
market_final = make_finalset(market_data, char_list)
market_final = coordinate_change(market_final,'epsg:5178',  'epsg:4326')


100%|██████████| 6/6 [00:00<00:00, 39.97it/s]


In [57]:
market_final

Unnamed: 0,name,latitude,longitude
0,엑스포공원 공영주차장,36.968686,129.401084
1,연지공원,36.994654,129.406559
2,2호관사앞 공영주차장,36.997189,129.403005
3,연호공원 공영주차장,36.994654,129.406559
4,민물고기연구센터 공영주차장,36.966554,129.380989
5,울진종합운동장,36.994654,129.406559


In [23]:
sijang = preprocess_data('../data/울진시장.xlsx')
sijang = coordinate_change(sijang, 'epsg:5178', 'epsg:4326')

100%|██████████| 14/14 [00:00<00:00, 41.96it/s]


In [31]:
def mark(
    m: folium.Map,
    data: pd.DataFrame,
    color: str,
    icon: str,
    size: float,
    popup: bool = True,
):
    """
    Explanation
        Marker is displayed on the map according to the top
         and longitude of the data and its name is displayed on Marker.

    Args:
        data: DataFrame which has name, lattitude, and longitude
        color: Specify the color of the marker to be displayed on the folium map.
        icon:Specify the icon of the marker to be displayed on the folium map.
        size: Specify the size of the marker to be displayed on the folium map.
        popup: Specifies that text is extracted when a marker is clicked (where name is specified).
    """

    for i in range(len(data)):
        long = data["latitude"][i]
        lat = data["longitude"][i]
        name = data["name"][i]
        folium.Marker(
            location=[long, lat],
            popup=name if popup else None,
            icon=folium.Icon(color=color, icon=icon, size=size),
        ).add_to(m)

In [36]:
m = folium.Map(location = [36.85, 129.4004195], zoom_start = 10)
mark(m, sijang, 'yellow', 'star', 0.1)
mark(m, market_final, 'blue', 'flag', 0.1)
m

In [37]:
m