In [1]:
import warnings
import pandas as pd
import geopandas as gpd
import glob
import numpy as np

# 오류 경고 무시하기
warnings.filterwarnings(action='ignore')

pd.set_option('display.max_columns', None)

# 1. 데이터 불러오기

In [2]:
# CSV 파일들이 저장된 디렉토리 경로
directory_path = "../cluster_by_service"

# 디렉토리 내의 모든 CSV 파일 경로 가져오기
csv_files = glob.glob(f'{directory_path}/*.csv')

dataframes_dict = {}

for file in csv_files:
    # 파일 이름에서 확장자를 제외한 부분을 키로 사용
    key = file.split('/')[-1].split('\\')[-1].split('.')[0]
    
    # CSV 파일을 데이터프레임으로 불러와 딕셔너리에 저장
    dataframes_dict[key] = pd.read_csv(file)

# 2. geoDataFrame으로 변환

In [3]:
def create_geodata(df):
    import pandas as pd
    import geopandas as gpd
    import matplotlib.pyplot as plt
    
    # prepare and read data
    F1 = "../map_data/서울시 상권분석서비스(영역-상권).shp"
    D1 = gpd.read_file(F1, encoding='utf-8', crs="EPSG:5181")

    geo_data = pd.merge(df[['상권_코드_명','서비스_업종_코드_명','총_유동인구_수','총_상주인구_수','총_직장_인구_수','유사_업종_점포_수','집객시설수','교통시설수','당월_매출_금액','영역_면적','cluster']],
                        D1,
                        left_on='상권_코드_명',
                        right_on='TRDAR_CD_N')
    geo_data = geo_data[['상권_코드_명','서비스_업종_코드_명','총_유동인구_수','총_상주인구_수','총_직장_인구_수','유사_업종_점포_수','집객시설수','교통시설수','당월_매출_금액','영역_면적','cluster','geometry']]
    geo_data = gpd.GeoDataFrame(geo_data)

    return geo_data

def merge_polygon(geo_data):
    import geopandas as gpd
    from shapely.ops import unary_union
    data = dict(상권_코드=[], 서비스_업종_코드_명=[],총_유동인구_수=[], 총_상주인구_수=[], 총_직장_인구_수=[], 유사_업종_점포_수=[], 집객시설수=[], 교통시설수=[], 당월_매출_금액=[],영역_면적=[], cluster=[],geometry=[])
    df = gpd.GeoDataFrame(data)
    n=1 
    cluster = geo_data['cluster'][0]
    service = geo_data['서비스_업종_코드_명'][0]
    # geo_data['result']=None
    while len(geo_data) != 0:
        while geo_data['result'].sum() != 1:
            geo_data['result'] = None
            for row in range(len(geo_data)):
                if geo_data['geometry'][0].intersects(geo_data['geometry'][row]):
                    geo_data['result'][row]=1
                else :
                    geo_data['result'][row]=0
            geo_data_union = geo_data[geo_data['result']==1]
            mergedPolys = unary_union(geo_data_union['geometry'])
            geo_data = geo_data[geo_data['result']==0]
            geo_data.loc[0] = [n,service,geo_data_union['총_유동인구_수'].sum(),geo_data_union['총_상주인구_수'].sum(),geo_data_union['총_직장_인구_수'].sum(),geo_data_union['유사_업종_점포_수'].sum(),geo_data_union['집객시설수'].sum(),geo_data_union['교통시설수'].sum(),geo_data_union['당월_매출_금액'].sum(),geo_data_union['영역_면적'].sum(),cluster,mergedPolys,1]
        df.loc[len(df)] = [n,service,geo_data['총_유동인구_수'][0],geo_data['총_상주인구_수'][0],geo_data['총_직장_인구_수'][0],geo_data['유사_업종_점포_수'][0],geo_data['집객시설수'][0],geo_data['교통시설수'][0],geo_data['당월_매출_금액'][0],geo_data['영역_면적'][0],cluster,geo_data['geometry'][0]]
        geo_data = geo_data.drop(0, axis=0).reset_index(drop=True)
        n+=1
    return df

def concat_geodata(merge_polygon_data):
    for i in range(1,len(merge_polygon_data)):
        if i == 1:
            df = pd.concat([merge_polygon_data['df_0'],merge_polygon_data['df_1']],axis=0)
        else:
            df = pd.concat([df,merge_polygon_data[f'df_{i}']],axis=0)
    list = []
    for i in range(1,len(df)+1):
        list.append(i)
    df['상권_코드'] = list
    df = df.reset_index(drop=True)
        
    return df

def evaluation_score(df,score1,score2,score3,score4,score5_1,score5_2,score5_3):
    df[['집객력(점수)','매출액(점수)','경쟁업체(점수)','편의성(점수)','잠재고객(점수)']] = 0
    
    # 집객력
    for i in range(len(df)):
        if df['집객시설수'][i].sum()<=score1[0]:
            df['집객력(점수)'][i] = 1
        elif df['집객시설수'][i].sum()<=score1[1]:
            df['집객력(점수)'][i] = 2
        elif df['집객시설수'][i].sum()<=score1[2]:
            df['집객력(점수)'][i] = 3
        elif df['집객시설수'][i].sum()<=score1[3]:
            df['집객력(점수)'][i] = 4
        else:
            df['집객력(점수)'][i] = 5

                
        # 매출액
        if df['당월_매출_금액'][i].sum()<=score2[0]:
            df['매출액(점수)'][i] = 1
        elif df['당월_매출_금액'][i].sum()<=score2[1]:
            df['매출액(점수)'][i] = 2
        elif df['당월_매출_금액'][i].sum()<=score2[2]:
            df['매출액(점수)'][i] = 3
        elif df['당월_매출_금액'][i].sum()<=score2[3]:
            df['매출액(점수)'][i] = 4
        else:
            df['매출액(점수)'][i] = 5
    
        # 경쟁업체
        if df['유사_업종_점포_수'][i].sum()<=score3[0]:
            df['경쟁업체(점수)'][i] = 1
        elif df['유사_업종_점포_수'][i].sum()<=score3[1]:
            df['경쟁업체(점수)'][i] = 2
        elif df['유사_업종_점포_수'][i].sum()<=score3[2]:
            df['경쟁업체(점수)'][i] = 3
        elif df['유사_업종_점포_수'][i].sum()<=score3[3]:
            df['경쟁업체(점수)'][i] = 4
        else:
            df['경쟁업체(점수)'][i] = 5
    
        # 편의성
        if df['교통시설수'][i].sum()<=score4[0]:
            df['편의성(점수)'][i] = 1
        elif df['교통시설수'][i].sum()<=score4[1]:
            df['편의성(점수)'][i] = 2
        elif df['교통시설수'][i].sum()<=score4[2]:
            df['편의성(점수)'][i] = 3
        elif df['교통시설수'][i].sum()<=score4[3]:
            df['편의성(점수)'][i] = 4
        else:
            df['편의성(점수)'][i] = 5
    
        # 잠재고객
        ## 상주인구
        if df['총_상주인구_수'][i].sum()<=score5_1[0]:
            df['잠재고객(점수)'][i] += 1
        elif df['총_상주인구_수'][i].sum()<=score5_1[1]:
            df['잠재고객(점수)'][i] += 2
        elif df['총_상주인구_수'][i].sum()<=score5_1[2]:
            df['잠재고객(점수)'][i] += 3
        elif df['총_상주인구_수'][i].sum()<=score5_1[3]:
            df['잠재고객(점수)'][i] += 4
        else:
            df['잠재고객(점수)'][i] += 5
        
        ## 유동인구
        if df['총_유동인구_수'][i].sum()<=score5_2[0]:
            df['잠재고객(점수)'][i] += 1
        elif df['총_유동인구_수'][i].sum()<=score5_2[1]:
            df['잠재고객(점수)'][i] += 2
        elif df['총_유동인구_수'][i].sum()<=score5_2[2]:
            df['잠재고객(점수)'][i] += 3
        elif df['총_유동인구_수'][i].sum()<=score5_2[3]:
            df['잠재고객(점수)'][i] += 4
        else:
            df['잠재고객(점수)'][i] += 5
        
        ## 직장인구
        if df['총_직장_인구_수'][i].sum()<=score5_3[0]:
            df['잠재고객(점수)'][i] += 1
        elif df['총_직장_인구_수'][i].sum()<=score5_3[1]:
            df['잠재고객(점수)'][i] += 2
        elif df['총_직장_인구_수'][i].sum()<=score5_3[2]:
            df['잠재고객(점수)'][i] += 3
        elif df['총_직장_인구_수'][i].sum()<=score5_3[3]:
            df['잠재고객(점수)'][i] += 4
        else:
            df['잠재고객(점수)'][i] += 5

    df['잠재고객(점수)'] = df['잠재고객(점수)'] / 3
    
    return df

In [4]:
geo_df_dict = {}

for key, value in dataframes_dict.items():
    geo_data = create_geodata(value)

    # Assuming 'your_column' is the column you're working with
    if isinstance(geo_data['geometry'], gpd.geoseries.GeoSeries):
        geo_data['geometry'] = geo_data['geometry'].buffer(5)
        geo_data['result'] = None
        
        cluster_data = {}  # Dictionary to store DataFrames
    
        for i in range(geo_data['cluster'].nunique()):
            cluster_data[f'geo_data_{i}'] = geo_data[geo_data['cluster'] == i].reset_index(drop=True)
    
    else:
        print("It's not a GeoSeries.")

    merge_polygon_data = {}  # Dictionary to store DataFrames

    for i in range(len(cluster_data)):
        merge_polygon_data[f'df_{i}'] = merge_polygon(cluster_data[f'geo_data_{i}'])

    df_all = concat_geodata(merge_polygon_data)

    score1 = [np.percentile(df_all['집객시설수'], q) for q in [20, 40, 60, 80]]
    score2 = [np.percentile(df_all['당월_매출_금액'], q) for q in [20, 40, 60, 80]]
    score3 = [np.percentile(df_all['유사_업종_점포_수'], q) for q in [20, 40, 60, 80]]
    score4 = [np.percentile(df_all['교통시설수'], q) for q in [20, 40, 60, 80]]
    score5_1 = [np.percentile(df_all['총_상주인구_수'], q) for q in [20, 40, 60, 80]]
    score5_2 = [np.percentile(df_all['총_유동인구_수'], q) for q in [20, 40, 60, 80]]
    score5_3 = [np.percentile(df_all['총_직장_인구_수'], q) for q in [20, 40, 60, 80]]

    df_final = evaluation_score(df_all,score1,score2,score3,score4,score5_1,score5_2,score5_3)
    geo_df_dict[key] = df_final

redefined_df = pd.concat(geo_df_dict.values(), ignore_index = True)

redefined_df.crs = "EPSG:5181"
redefined_df = redefined_df.to_crs(epsg = 4326)

redefined_df.to_csv("../input_csv/redefined_df.csv", index = False)

redefined_df = pd.read_csv("../input_csv/redefined_df.csv")

import shapely.wkt
redefined_df['geometry'] = redefined_df['geometry'].apply(lambda x: shapely.wkt.loads(x))

import geopandas as gpd
from shapely.geometry import LineString, MultiLineString

# Assuming your DataFrame is called df and the geometry column is called 'geometry'
# Replace 'geometry' with the actual column name if it's different

redefined_df['geometry2'] = None

for i in range(len(redefined_df)):
    if redefined_df['geometry'][i].geom_type == 'Polygon':
        redefined_df['geometry2'][i] = LineString(redefined_df['geometry'][i].exterior)
    elif redefined_df['geometry'][i].geom_type == 'MultiPolygon':
        redefined_df['geometry2'][i] = MultiLineString([LineString(poly.exterior) for poly in redefined_df['geometry'][i].geoms])

redefined_df.to_csv("../input_csv/redefined_df.csv", index = False)

In [5]:
redefined_df

Unnamed: 0,상권_코드,서비스_업종_코드_명,총_유동인구_수,총_상주인구_수,총_직장_인구_수,유사_업종_점포_수,집객시설수,교통시설수,당월_매출_금액,영역_면적,cluster,geometry,집객력(점수),매출액(점수),경쟁업체(점수),편의성(점수),잠재고객(점수),geometry2
0,1,PC방,4539089.50,9588,1580,7,0.0,0.0,1.133742e+09,234224,0,POLYGON ((126.93440736942736 37.46765303321722...,1,5,5,1,4.333333,LINESTRING (126.93440736942736 37.467653033217...
1,2,PC방,16273249.00,16458,144420,16,658.0,59.0,1.289435e+09,1562746,0,POLYGON ((127.02248470599385 37.49640036649937...,5,5,5,5,5.000000,LINESTRING (127.02248470599385 37.496400366499...
2,3,PC방,2924851.00,4782,892,6,83.0,9.0,4.014410e+08,275082,0,POLYGON ((127.06601812263857 37.54077443531351...,5,4,4,4,3.666667,LINESTRING (127.06601812263857 37.540774435313...
3,4,PC방,4589353.75,899,1552,10,190.0,28.0,8.129638e+08,267693,0,POLYGON ((127.06000553082049 37.65328168143866...,5,5,5,5,3.000000,LINESTRING (127.06000553082049 37.653281681438...
4,5,PC방,8102497.25,12777,37704,10,322.0,27.0,2.865765e+09,1040535,0,POLYGON ((127.02213215191853 37.51412093435149...,5,5,5,5,5.000000,LINESTRING (127.02213215191853 37.514120934351...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17307,222,화초,1146115.50,1319,17175,6,0.0,0.0,2.985131e+07,149660,3,POLYGON ((127.05624428377789 37.49739863928150...,1,2,3,1,3.333333,LINESTRING (127.05624428377789 37.497398639281...
17308,223,화초,856378.50,73,30610,6,0.0,0.0,2.427971e+08,200306,3,POLYGON ((126.98279757109825 37.56903233158031...,1,5,3,1,2.666667,LINESTRING (126.98279757109825 37.569032331580...
17309,224,화초,416415.75,279,12937,9,0.0,0.0,1.581276e+08,124067,3,POLYGON ((126.98050412101318 37.57615189223551...,1,4,4,1,2.333333,LINESTRING (126.98050412101318 37.576151892235...
17310,225,화초,453583.25,1598,2190,9,0.0,0.0,7.796039e+08,144683,3,"POLYGON ((126.9660870334594 37.56157461963889,...",1,5,4,1,2.000000,LINESTRING (126.9660870334594 37.5615746196388...


In [6]:
len(list(redefined_df.서비스_업종_코드_명.unique()))

63