In [95]:
import warnings
import pandas as pd
import geopandas as gpd
import glob
import numpy as np

# 오류 경고 무시하기
warnings.filterwarnings(action='ignore')

pd.set_option('display.max_columns', None)

In [96]:
# CSV 파일들이 저장된 디렉토리 경로
directory_path = "C:/Users/Admin/Desktop/최종프로젝트/cluster"

# 디렉토리 내의 모든 CSV 파일 경로 가져오기
csv_files = glob.glob(f'{directory_path}/*.csv')

dataframes_dict = {}

for file in csv_files:
    # 파일 이름에서 확장자를 제외한 부분을 키로 사용
    key = file.split('/')[-1].split('\\')[-1].split('.')[0]
    
    # CSV 파일을 데이터프레임으로 불러와 딕셔너리에 저장
    dataframes_dict[key] = pd.read_csv(file)

In [97]:
def create_geodata(df):
    import pandas as pd
    import geopandas as gpd
    import matplotlib.pyplot as plt
    
    # prepare and read data
    F1 = "C:/Users/Admin/Desktop/최종프로젝트/map/서울시 상권분석서비스(영역-상권).shp"
    D1 = gpd.read_file(F1, encoding='utf-8')

    geo_data = pd.merge(df[['상권_코드_명','서비스_업종_코드_명','총_유동인구_수','총_상주인구_수','총_직장_인구_수','유사_업종_점포_수','집객시설수','교통시설수','당월_매출_금액','영역_면적','cluster']],
                        D1,
                        left_on='상권_코드_명',
                        right_on='TRDAR_CD_N')
    geo_data = geo_data[['상권_코드_명','서비스_업종_코드_명','총_유동인구_수','총_상주인구_수','총_직장_인구_수','유사_업종_점포_수','집객시설수','교통시설수','당월_매출_금액','영역_면적','cluster','geometry']]
    geo_data = gpd.GeoDataFrame(geo_data)

    return geo_data

def merge_polygon(geo_data):
    import geopandas as gpd
    from shapely.ops import unary_union
    data = dict(상권_코드=[], 서비스_업종_코드_명=[],총_유동인구_수=[], 총_상주인구_수=[], 총_직장_인구_수=[], 유사_업종_점포_수=[], 집객시설수=[], 교통시설수=[], 당월_매출_금액=[],영역_면적=[], cluster=[],geometry=[])
    df = gpd.GeoDataFrame(data)
    n=1 
    cluster = geo_data['cluster'][0]
    service = geo_data['서비스_업종_코드_명'][0]
    # geo_data['result']=None
    while len(geo_data) != 0:
        while geo_data['result'].sum() != 1:
            geo_data['result'] = None
            for row in range(len(geo_data)):
                if geo_data['geometry'][0].intersects(geo_data['geometry'][row]):
                    geo_data['result'][row]=1
                else :
                    geo_data['result'][row]=0
            geo_data_union = geo_data[geo_data['result']==1]
            mergedPolys = unary_union(geo_data_union['geometry'])
            geo_data = geo_data[geo_data['result']==0]
            geo_data.loc[0] = [n,service,geo_data_union['총_유동인구_수'].sum(),geo_data_union['총_상주인구_수'].sum(),geo_data_union['총_직장_인구_수'].sum(),geo_data_union['유사_업종_점포_수'].sum(),geo_data_union['집객시설수'].sum(),geo_data_union['교통시설수'].sum(),geo_data_union['당월_매출_금액'].sum(),geo_data_union['영역_면적'].sum(),cluster,mergedPolys,1]
        df.loc[len(df)] = [n,service,geo_data['총_유동인구_수'][0],geo_data['총_상주인구_수'][0],geo_data['총_직장_인구_수'][0],geo_data['유사_업종_점포_수'][0],geo_data['집객시설수'][0],geo_data['교통시설수'][0],geo_data['당월_매출_금액'][0],geo_data['영역_면적'][0],cluster,geo_data['geometry'][0]]
        geo_data = geo_data.drop(0, axis=0).reset_index(drop=True)
        n+=1
    return df

def concat_geodata(merge_polygon_data):
    for i in range(1,len(merge_polygon_data)):
        if i == 1:
            df = pd.concat([merge_polygon_data['df_0'],merge_polygon_data['df_1']],axis=0)
        else:
            df = pd.concat([df,merge_polygon_data[f'df_{i}']],axis=0)
    list = []
    for i in range(1,len(df)+1):
        list.append(i)
    df['상권_코드'] = list
    df = df.reset_index(drop=True)
        
    return df

def evaluation_score(df,score1,score2,score3,score4,score5_1,score5_2,score5_3):
    df[['집객력(점수)','매출액(점수)','경쟁업체(점수)','편의성(점수)','잠재고객(점수)']] = 0
    
    # 집객력
    for i in range(len(df)):
        if df['집객시설수'][i].sum()<=score1[0]:
            df['집객력(점수)'][i] = 1
        elif df['집객시설수'][i].sum()<=score1[1]:
            df['집객력(점수)'][i] = 2
        elif df['집객시설수'][i].sum()<=score1[2]:
            df['집객력(점수)'][i] = 3
        elif df['집객시설수'][i].sum()<=score1[3]:
            df['집객력(점수)'][i] = 4
        else:
            df['집객력(점수)'][i] = 5

                
        # 매출액
        if df['당월_매출_금액'][i].sum()<=score2[0]:
            df['매출액(점수)'][i] = 1
        elif df['당월_매출_금액'][i].sum()<=score2[1]:
            df['매출액(점수)'][i] = 2
        elif df['당월_매출_금액'][i].sum()<=score2[2]:
            df['매출액(점수)'][i] = 3
        elif df['당월_매출_금액'][i].sum()<=score2[3]:
            df['매출액(점수)'][i] = 4
        else:
            df['매출액(점수)'][i] = 5
    
        # 경쟁업체
        if df['유사_업종_점포_수'][i].sum()<=score3[0]:
            df['경쟁업체(점수)'][i] = 1
        elif df['유사_업종_점포_수'][i].sum()<=score3[1]:
            df['경쟁업체(점수)'][i] = 2
        elif df['유사_업종_점포_수'][i].sum()<=score3[2]:
            df['경쟁업체(점수)'][i] = 3
        elif df['유사_업종_점포_수'][i].sum()<=score3[3]:
            df['경쟁업체(점수)'][i] = 4
        else:
            df['경쟁업체(점수)'][i] = 5
    
        # 편의성
        if df['교통시설수'][i].sum()<=score4[0]:
            df['편의성(점수)'][i] = 1
        elif df['교통시설수'][i].sum()<=score4[1]:
            df['편의성(점수)'][i] = 2
        elif df['교통시설수'][i].sum()<=score4[2]:
            df['편의성(점수)'][i] = 3
        elif df['교통시설수'][i].sum()<=score4[3]:
            df['편의성(점수)'][i] = 4
        else:
            df['편의성(점수)'][i] = 5
    
        # 잠재고객
        ## 상주인구
        if df['총_상주인구_수'][i].sum()<=score5_1[0]:
            df['잠재고객(점수)'][i] += 1
        elif df['총_상주인구_수'][i].sum()<=score5_1[1]:
            df['잠재고객(점수)'][i] += 2
        elif df['총_상주인구_수'][i].sum()<=score5_1[2]:
            df['잠재고객(점수)'][i] += 3
        elif df['총_상주인구_수'][i].sum()<=score5_1[3]:
            df['잠재고객(점수)'][i] += 4
        else:
            df['잠재고객(점수)'][i] += 5
        
        ## 유동인구
        if df['총_유동인구_수'][i].sum()<=score5_2[0]:
            df['잠재고객(점수)'][i] += 1
        elif df['총_유동인구_수'][i].sum()<=score5_2[1]:
            df['잠재고객(점수)'][i] += 2
        elif df['총_유동인구_수'][i].sum()<=score5_2[2]:
            df['잠재고객(점수)'][i] += 3
        elif df['총_유동인구_수'][i].sum()<=score5_2[3]:
            df['잠재고객(점수)'][i] += 4
        else:
            df['잠재고객(점수)'][i] += 5
        
        ## 직장인구
        if df['총_직장_인구_수'][i].sum()<=score5_3[0]:
            df['잠재고객(점수)'][i] += 1
        elif df['총_직장_인구_수'][i].sum()<=score5_3[1]:
            df['잠재고객(점수)'][i] += 2
        elif df['총_직장_인구_수'][i].sum()<=score5_3[2]:
            df['잠재고객(점수)'][i] += 3
        elif df['총_직장_인구_수'][i].sum()<=score5_3[3]:
            df['잠재고객(점수)'][i] += 4
        else:
            df['잠재고객(점수)'][i] += 5

    df['잠재고객(점수)'] = df['잠재고객(점수)'] / 3
    
    return df

In [98]:
geo_df_dict = {}

for key, value in dataframes_dict.items():
    geo_data = create_geodata(value)

    # Assuming 'your_column' is the column you're working with
    if isinstance(geo_data['geometry'], gpd.geoseries.GeoSeries):
        geo_data['geometry'] = geo_data['geometry'].buffer(5)
        geo_data['result'] = None
        
        cluster_data = {}  # Dictionary to store DataFrames
    
        for i in range(geo_data['cluster'].nunique()):
            cluster_data[f'geo_data_{i}'] = geo_data[geo_data['cluster'] == i].reset_index(drop=True)
    
    else:
        print("It's not a GeoSeries.")

    merge_polygon_data = {}  # Dictionary to store DataFrames

    for i in range(len(cluster_data)):
        merge_polygon_data[f'df_{i}'] = merge_polygon(cluster_data[f'geo_data_{i}'])

    df_all = concat_geodata(merge_polygon_data)

    score1 = [np.percentile(df_all['집객시설수'], q) for q in [20, 40, 60, 80]]
    score2 = [np.percentile(df_all['당월_매출_금액'], q) for q in [20, 40, 60, 80]]
    score3 = [np.percentile(df_all['유사_업종_점포_수'], q) for q in [20, 40, 60, 80]]
    score4 = [np.percentile(df_all['교통시설수'], q) for q in [20, 40, 60, 80]]
    score5_1 = [np.percentile(df_all['총_상주인구_수'], q) for q in [20, 40, 60, 80]]
    score5_2 = [np.percentile(df_all['총_유동인구_수'], q) for q in [20, 40, 60, 80]]
    score5_3 = [np.percentile(df_all['총_직장_인구_수'], q) for q in [20, 40, 60, 80]]

    df_final = evaluation_score(df_all,score1,score2,score3,score4,score5_1,score5_2,score5_3)
    geo_df_dict[key] = df_final

In [99]:
redefined_df = pd.concat(geo_df_dict.values(), ignore_index = True)

In [100]:
redefined_df.to_csv("redefined_df.csv", index = False)