In [1]:
import pandas as pd
import folium
import math
import itertools
from itertools import combinations
from pyproj import Proj, transform
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

from typing import List

In [2]:
def preprocess_data(path: str) -> pd.DataFrame:
    """
    "Note": Modify and use according to your own data.
    Or you don't need to use this code, and you just insert some code about preprocess in "main"

    Explanation
    Data Path is received as inputs and data is purified in the order of "name"/"latitude"/"longitude".

    Arguments
        path: A path of a file in the form of 'xlsx' and 'csv' is input.

    Return
        Pandas Data Frame: Form of Pandas Data Frame with Column in the order of "Name", "Latitude", and "Longitude"
    """

    if "xlsx" in path:
        data = pd.read_excel(path)
        ############ input/output에 따라 수정 ####################
        if "대구분명칭" in data.columns:
            data = data[["대구분명칭", "위도", "경도"]]
        elif "시장명" in data.columns:
            data = data[["시장명", "위도", "경도"]]
        data.columns = ["name", "latitude", "longitude"]
        ##########################################################
    elif "csv" in path:
        data = pd.read_csv(path, header=None)  # header 옵션 역시 본인의 데이터에 맞추어 수정한다.
        ############ input/output에 따라 수정 ####################
        data.columns = ["longitude", "latitude", "name"]
        data = data[["name", "latitude", "longitude"]]
        ##########################################################
    return data

- 분석목표가 전통시장 DTC 입지선정이며, DTC 후보지를 공공시설 후보지로 지정했음
- 따라서 수요지는 편의점 위치 대신 유동인구 위치로 지정함

In [3]:
market_data = pd.read_excel('cu.xlsx')
population_data = pd.read_excel('hwang_living_nodup.xlsx')
market_data.columns = ['add', 'name', 'latitude', 'longitude']
population_data.columns = ['longitude', 'latitude', 'LOG_BLCK_SUM']

In [4]:
def coordinate_change(data: pd.DataFrame, c1: str, c2: str) -> pd.DataFrame:
    """
    Explanation
        The latitude and longitude existing in the data frame are converted from the coordinate c1 to the coordinate c2 to be converted.
    Arguments
        data: The columns are in the form of a Pandas Data Frame in the order of "name", "latitude", and "longitude".
        c1: The original latitude and longitude coordinate system.
        c2: The latitude and longitude coordinate system to convert.
            c1 & c2: something like 'epsg:5178', 'epsg:4326', etc..

    Return
        Pandas Data Frame: Data frame with converted latitude and longitude coordinates.
    """
    proj_c1 = Proj(init=c1)
    proj_c2 = Proj(init=c2)

    for i in tqdm(range(len(data))):
        change_long, change_lat = transform(
            proj_c1, proj_c2, data["longitude"][i], data["latitude"][i]
        )

        data["longitude"][i] = change_long
        data["latitude"][i] = change_lat

    return data

In [5]:
market_data = coordinate_change(market_data, 'epsg:4326', 'epsg:5178')
population_data = coordinate_change(population_data, 'epsg:4326', 'epsg:5178')

100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:02<00:00,  5.59it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 71/71 [00:06<00:00, 10.89it/s]


In [6]:
#위경도 값 변환된 걸 확인할 수 있음
market_data

Unnamed: 0,add,name,latitude,longitude
0,서울특별시 광진구 능동로 16길 35,CU 건국타운점,1949309.0,962646.107372
1,서울특별시 광진구 아차산로 229,CU 건대한림타워점,1948834.0,962113.898262
2,서울특별시 광진구 아차산로 241,CU 건대화양점,1948811.0,962219.809147
3,서울특별시 광진구 광나루로 24길 31-17,CU 건대후문점,1949330.0,962902.387498
4,서울특별시 광진구 광나루로20길 13-10,CU 건대힐스점,1949431.0,962655.490911
5,서울특별시 광진구 능동로 169-1,CU 광진원룸촌점,1949398.0,962487.500149
6,서울특별시 광진구 광나루로 16길 30,CU 광진화양점,1949344.0,962098.562473
7,서울특별시 광진구 아차산로 25길 45,CU 뉴화양본점,1949112.0,961964.98688
8,서울특별시 광진구 동일로 24길 94,CU 화양공원점,1949011.0,962203.050718
9,서울특별시 광진구 능동로 13길 70-1,CU 화양대박점,1949278.0,962133.59361


In [7]:
def shortest_distance(F: pd.DataFrame, L: pd.DataFrame):
    """
    Explanation
        Create the shortest matrix between public facilities and floating population.

    Arguments
        F: Coordinates of public facilities.
        L: Coordinates of the floating population.

    Return
        Pandas Data Frame: Shortest distance matrix between public facilities and floating population
    """
    F_list = []
    L_list = []
    #column 이름 (F0~F12) 생성
    for i in range(len(F)):
        name = f"F_{i}"
        F_list.append(name)
    #row 이름 (l0~l70) 생성
    for i in range(len(L)):
        name = f"L_{i}"
        L_list.append(name)
        

    distance = pd.DataFrame(columns=F_list, index=L_list)
    for i in range(len(distance)):
        for j, col in enumerate(distance.columns):
            square_sum = ((F["latitude"][j] - L["latitude"][i]) ** 2) + (
                (F["longitude"][j] - L["longitude"][i]) ** 2
            )
            dist = math.sqrt(square_sum)
            distance[col][i] = dist

    return distance

1. 행은 블록, 열은 편의점, 안의 value 값은 블록 / 편의점 간 거리 -> Matrix 만드는 것이 목적

In [8]:
#(블록, 편의점)
distance = shortest_distance(market_data, population_data)
distance

Unnamed: 0,F_0,F_1,F_2,F_3,F_4,F_5,F_6,F_7,F_8,F_9,F_10,F_11,F_12,F_13
L_0,410.662766,496.011273,503.793359,667.060508,435.666146,265.405304,140.081854,338.09951,305.494815,108.468031,284.14003,510.787577,168.312214,295.116714
L_1,41.927837,682.037382,628.134255,298.983783,132.841152,147.363849,506.921959,668.593613,499.125641,471.57192,472.179839,830.787629,240.369836,313.969651
L_2,637.831889,208.188154,271.704753,883.30502,706.296073,552.68111,308.592289,128.021422,136.682618,248.987219,584.997578,239.581017,477.798986,618.298949
L_3,655.60077,657.829922,703.576874,901.276973,643.264424,480.814411,163.182572,375.49521,509.35992,238.460873,246.139565,515.914287,376.893812,400.32341
L_4,534.780382,387.981611,422.964307,791.353937,576.212882,408.988726,124.06962,188.720577,227.203736,57.77731,393.850319,361.320201,316.900671,440.913955
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
L_66,803.070249,285.310125,384.039743,1052.233941,862.873795,701.586099,382.535535,114.998881,306.190231,350.356282,669.644664,67.34756,615.326196,739.967817
L_67,823.449977,278.580607,380.741791,1071.15023,885.577845,725.497772,410.701516,142.889604,318.137606,377.410472,697.903853,54.089538,640.565233,766.768652
L_68,789.473001,229.144997,330.373559,1034.282691,856.496099,699.909956,407.358648,143.143283,274.119645,365.688643,694.571779,103.743927,619.408225,751.752808
L_69,666.983197,76.991843,147.739998,895.633361,752.950197,615.921682,433.673172,248.355149,136.263515,367.489596,698.725477,293.136194,557.83243,708.605442


2. p-median에서 p값에 따라 편의점 후보지 생성.
예를 들어 p = 3 이면 [편의점1, 편의점2, 편의점3], [편의점1, 편의점2, 편의점4]

In [9]:
def p_list_set(distance_data: pd.DataFrame, p: int) -> List[List]:
    """
    Explanation
        Based on F(Public Facilities), '2p' public facilities with the shortest
         distance from the floating population coordinates are selected

    Args
        distance_data: The matrix of distances between F and L. (column = F, row = L)
        p: The number of public facilities to be finally selected.

    return
        p_list_set: A set of p lists tied up in p
        ex) candidate= [1,2,3,4,5,6,7,8,9,10] / p=3
            p_list_set = [
                [1,2,3],
                [2,3,4],
                [3,4,5]
            ]
    """
    # The sum of the distances between the coordinates of the floating population for public facilities.
    # 편의점 별 모든 블록의 총거리 합을 도출 -> 편의점 마다 총거리 값 산출
    elements = list(range(len(distance_data.columns)))
    k = p 
    col_sum = list(distance.sum(axis=0))
    col_sum_tuple = []

    combinations = list(itertools.combinations(elements, k))
    for com in combinations:
        temp = []
        for i in range(len(com)):
            idx = com[i]
            tup = (idx, col_sum[idx])
            temp.append(tup)
            
        col_sum_tuple.append(temp)

    return col_sum_tuple

3. p값에 따라 설정한 후보군 중에서 어떤 후보가 가장 모든 블럭에 대해 sumation을 했을 때 최소값을 가지는지

In [10]:
def candidate_place(
    pb: pd.DataFrame, distance: pd.DataFrame, p_list_set: List[List]
) -> List:
    """
    Explanation
        Only names are extracted from DataFrame having a minimal distance within each set.

    Args:
        pb: DataFrame for public facilities.
        distance: DataFrame about the distance between F and L

        p_list_set: In distance, a set of p lists grouped by p based on distance

    Return:
        List: List of names of p public facilities.
    """

    min_sum_list = []  # take the minimum values in the pth list.
    for i in range(len(p_list_set)):
        tup_check = []
        for j in p_list_set[i]:
            tup_check.append(f"F_{j[0]}")
        check_df = distance[tup_check]

        #check_df['min']에는 i번째 후보지 집합 내 p개의 후보지와 k번째 블럭 간 거리를 산출했을 때 최소값을 기입
        check_df["min"] = 0  # generate 'min' column
        min_list = []
        for k in range(len(check_df)):
            k_th_row = check_df.iloc[k][:-1]  # exclude 'min' column
    #         check_df["min"][k] = min(k_th_row) 
            min_list.append(min(k_th_row))
        check_df['min'] = min_list

        check_df['demand'] = list(population_data['LOG_BLCK_SUM'])
#         print(max(check_df['min']))
        if max(check_df['min']) > 250:
            min_sum_value = 1000*list(check_df['demand'])
            min_sum_list.append(sum(min_sum_value))
        else:
            min_sum_value = check_df['min']*check_df['demand']
#             print(max(check_df['min']))
            min_sum_list.append(sum(min_sum_value))
#     print(min_sum_list)

    #     min_sum_value = sum(check_df["min"])
    #     min_sum_list.append(min_sum_value)
    #p개의 후보군 집합 중에서 수요지와의 가리의 합이 가장 작은 후보군 집합을 선택한다.
    if len(set(min_sum_list)) == 1:
        name_list = []
    else:
        final_index = min_sum_list.index(min(min_sum_list))
        final_set = p_list_set[final_index]
        final_set.sort(key=lambda x: x[0])

        final_idx = [idx for idx, dist in final_set]
        final_market_data = pb.iloc[final_idx, :]
        final_market_data.reset_index(drop=True, inplace=True)

        name_list = [name for name in final_market_data["name"]]
    return name_list

우리의 목표<br>
1. 모든 블록과 후보 편의점 간의 거리를 산출했을 떄 우리가 정한 기준값(250m)보다는 작아야 한다.<br>
2. 모든 블록과 후보 편의점 간의 거리를 산출했을 때 250m보다 모두 작다면, 후보군들 중 개별 블록과 편의점 간의 최소거리를 모두 더했을 때의 값이 가장 작은 후보군을 채택한다. 이는 해당 편의점 후보군이 평균적으로 모든 블록과 가까운 것을 의미하기 때문이다.

In [11]:
char_list = []
for p in range(3, 13) :
    p_list = p_list_set(distance, p)
    name_list = candidate_place(market_data, distance, p_list)
    print(name_list)
    #p=3~11 일떄 후보군을 모두 char_list에 저장
    print('########',p,'###########')
    char_list.extend(name_list) 

[]
######## 3 ###########
[]
######## 4 ###########
[]
######## 5 ###########
['CU 건대후문점', 'CU 광진원룸촌점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양문화점']
######## 6 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양문화점', 'CU 화양원룸촌점']
######## 7 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 8 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 9 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 10 ###########
['CU 건국타운점', 'CU 건대한림타워점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 11 ###########
['CU 건국타운점', 'CU 건대한림타워점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점', 'CU 화양하이뷰점']
#######

In [12]:
char_list = []
for p in range(3, 30) :
    p_list = p_list_set(distance, p)
    name_list = candidate_place(market_data, distance, p_list)
    print(name_list)
    #p=3~11 일떄 후보군을 모두 char_list에 저장
    print('########',p,'###########')
    char_list.extend(name_list) 

[]
######## 3 ###########
[]
######## 4 ###########
[]
######## 5 ###########
['CU 건대후문점', 'CU 광진원룸촌점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양문화점']
######## 6 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양문화점', 'CU 화양원룸촌점']
######## 7 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 8 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 9 ###########
['CU 건국타운점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 10 ###########
['CU 건국타운점', 'CU 건대한림타워점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
######## 11 ###########
['CU 건국타운점', 'CU 건대한림타워점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점', 'CU 화양하이뷰점']
#######

ValueError: min() arg is an empty sequence

=================================================================================================================================================================

In [13]:
def top_value(char_list: List) -> int:
    """Heuristic Method with P-Median
    Explanation
        Get the top three to six. (If there is a duplicate value, bring up to six.)

    Args:
        char_list: A list of the names of the final candidates.

    Return:
        int: The number of final candidates to get
    """
    appearance_candidate = list(pd.Series(char_list).value_counts())
    
    #appearance_candidate list에서 3번째(first), 4번째(second)의 값이 같을 경우 num_of_candidate를 1 증가시킨다.
    #first, second의 값은 각 편의점이 p=3~11까지 변화할 때의 후보지 list에 포함된 횟수이다.
    num_of_candidate = 3
    for first, second in zip(appearance_candidate[2:], appearance_candidate[3:]):
        if first == second:
            num_of_candidate += 1 
        else:
            break

    return num_of_candidate

In [14]:
def make_finalset(market_data: pd.DataFrame, char_list: List) -> pd.DataFrame:
    """Heuristic Method with P-Median
    Explanation
        After receiving the char_list, which is the list of the final candidates,
         The final candidates is mapped with public facility data
         to return a DataFrame containing only the final candidates.

    Args:
        market_data: Data containing market location information.
        char_list: The list with the names of the final candidates overlapped.

    Return:
        Pandas DataFrame: The final DataFrame containing the names, latitudes, and longitude of the final candidates.
    """

    num_of_candidate = top_value(char_list)
    final_name_list = pd.Series(char_list).value_counts().index[:num_of_candidate]

    market_index = [
        (market_data[market_data["name"] == name].index)[0] for name in final_name_list
    ]

    market_final = market_data.iloc[market_index, :]
    market_final.reset_index(drop=True, inplace=True)
    return market_final

In [15]:
market_final = make_finalset(market_data, char_list)
market_final = coordinate_change(market_final,'epsg:5178',  'epsg:4326')

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00,  7.37it/s]


In [20]:
market_data = pd.read_csv('gs25 (1).csv',encoding='cp949')
market_data.columns = ['add', 'name', 'latitude', 'longitude']

In [21]:
name_list = ['CU 건국타운점', 'CU 건대후문점', 'CU 건대힐스점', 'CU 광진화양점', 'CU 뉴화양본점', 'CU 화양공원점', 'CU 화양대박점', 'CU 화양문화점', 'CU 화양사랑점', 'CU 화양원룸촌점']
market_final = market_data[market_data['name'].isin(name_list)]
market_final.reset_index(drop=True,inplace=True)
market_final

Unnamed: 0,add,name,latitude,longitude


In [22]:
def mark(
    m: folium.Map,
    data: pd.DataFrame,
    color: str,
    icon: str,
    size: float,
    popup: bool = True,
):
    """
    Explanation
        Marker is displayed on the map according to the top
         and longitude of the data and its name is displayed on Marker.

    Args:
        data: DataFrame which has name, lattitude, and longitude
        color: Specify the color of the marker to be displayed on the folium map.
        icon:Specify the icon of the marker to be displayed on the folium map.
        size: Specify the size of the marker to be displayed on the folium map.
        popup: Specifies that text is extracted when a marker is clicked (where name is specified).
    """

    for i in range(len(data)):
        long = data["latitude"][i]
        lat = data["longitude"][i]
        name = data["name"][i]
        folium.Marker(
            location=[long, lat],
            popup=name if popup else None,
            icon=folium.Icon(color=color, icon=icon, size=size),
        ).add_to(m)

In [23]:
market_data = pd.read_excel('cu.xlsx')
market_data.columns = ['add', 'name', 'latitude', 'longitude']

In [24]:
m = folium.M
ap(location = [37.544793, 127.065894], zoom_start = 10)
mark(m, market_data, 'yellow', 'star', 0.1)
mark(m, market_final, 'blue', 'flag', 0.1)
m

In [None]:
market_final