In [54]:
import os
import csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.font_manager as fm

dir = 'D:/han_project03/project'

plt.rcParams['figure.figsize'] = [10, 6]
font_path = 'cluster/NanumFontSetup_TTF_ALL/NanumGothic.ttf'
fontprop = fm.FontProperties(fname=font_path, size=15)


data = {}

def readCsv(dir, data):
    path = os.path.join(dir, 'data', data)
    csvFiles = []
    for idx, f in enumerate(os.listdir(path)):
        csvFiles.append(pd.read_csv(os.path.join(path, f), encoding='utf-8'))
    return pd.concat(csvFiles)

def loadDistrictData(dir):
    print('Load district data...')
    path = os.path.join(dir, 'data', '상권영역')
    csvFiles = []
    for idx, f in enumerate(os.listdir(path)):
        csvFiles.append(pd.read_csv(os.path.join(path, f), encoding='utf-8'))

    df = pd.DataFrame.merge(csvFiles[0], csvFiles[1], on='시군구_코드')[['상권_코드', '상권_코드_명', '시군구명']]
    df = df.drop_duplicates(subset=['상권_코드']).reset_index(drop=True)
    return df

def loadSalesData(dir):
    print('Load sales data...')
    df = readCsv(dir, '상권매출').drop(['상권_구분_코드', '상권_구분_코드_명'], axis=1)
    df = df.sort_values(['기준_년_코드', '기준_분기_코드', '상권_코드', '서비스_업종_코드']).reset_index(drop=True)
    return df

def loadStoresData(dir):
    print('Load stores data...')
    df = readCsv(dir, '상권점포').drop(['상권_구분_코드', '상권_구분_코드_명'], axis=1)
    df = df.sort_values(['기준_년_코드', '기준_분기_코드', '상권_코드', '서비스_업종_코드']).reset_index(drop=True)
    return df

def loadFloatingData(dir, regionDf):
    print('Load floating data...')
    df = readCsv(dir, '상권추정유동인구')
    df = pd.DataFrame.merge(df, regionDf).reset_index(drop=True)
    return df

def loadPeopleData(dir, regionDf):
    print('Load people data...')
    df = readCsv(dir, '상권상주인구')
    df = pd.DataFrame.merge(df, regionDf).reset_index(drop=True)
    return df

def makeServiceData(salesDf):
    print('Make service data...')
    df = salesDf[['서비스_업종_코드', '서비스_업종_코드_명']].drop_duplicates(subset=['서비스_업종_코드']).reset_index(drop=True)
    df = df.sort_values(['서비스_업종_코드'])
    arr1 = []
    arr2 = []
    for i in range(df.shape[0]):
        if df['서비스_업종_코드'][i][2] == '1':
            tmp1 = '음식업'
            tmp2 = '외식업'
        elif df['서비스_업종_코드'][i][2] == '2':
            if int(df['서비스_업종_코드'][i][-2:]) < 4:
                tmp1 = '학문교육'
            elif int(df['서비스_업종_코드'][i][-2:]) < 7:
                tmp1 = '의약의료'
            elif int(df['서비스_업종_코드'][i][-2:]) == 8:
                tmp1 = '부동산'
            elif int(df['서비스_업종_코드'][i][-2:]) < 13:
                tmp1 = '여가오락'
            elif int(df['서비스_업종_코드'][i][-2:]) < 16:
                tmp1 = '생활서비스'
            else:
                tmp1 = '미용'
            tmp2 = '서비스업'
        elif df['서비스_업종_코드'][i][2] == '3':
            if int(df['서비스_업종_코드'][i][-2:]) < 3:
                tmp1 = '소매업'
            elif int(df['서비스_업종_코드'][i][-2:]) < 5:
                tmp1 = 'IT기기'
            elif int(df['서비스_업종_코드'][i][-2:]) < 7:
                tmp1 = '소매업'
            elif int(df['서비스_업종_코드'][i][-2:]) < 9:
                tmp1 = '의류'
            elif int(df['서비스_업종_코드'][i][-2:]) == 9:
                tmp1 = '의약의료'
            elif int(df['서비스_업종_코드'][i][-2:]) == 10:
                tmp1 = '소매업'
            elif int(df['서비스_업종_코드'][i][-2:]) == 11:
                tmp1 = '미용'
            elif int(df['서비스_업종_코드'][i][-2:]) == 12:
                tmp1 = '여가오락'
            elif int(df['서비스_업종_코드'][i][-2:]) == 13:
                tmp1 = '의류'
            elif int(df['서비스_업종_코드'][i][-2:]) == 14:
                tmp1 = '소매업'
            elif int(df['서비스_업종_코드'][i][-2:]) < 16:
                tmp1 = '소매업'
            else:
                tmp1 = 'IT기기'
            tmp2 = '도소매업'
        arr1.append(tmp1)
        arr2.append(tmp2)

    df['중분류_서비스'] = arr1
    df['대분류_서비스'] = arr2
    return df


In [55]:
data['District'] = loadDistrictData(dir)
data['Sales'] = loadSalesData(dir)
data['Service'] = makeServiceData(data['Sales'])
data['Stores'] = loadStoresData(dir)
data['Floating'] = loadFloatingData(dir, data['District'])
data['People'] = loadPeopleData(dir, data['District'])
data['People']
data['MainData'] = pd.DataFrame.merge(data['District'], data['Sales'], on=['상권_코드', '상권_코드_명']).reset_index(drop=True)
data['MainData'] = pd.DataFrame.merge(data['MainData'], data['Stores']).reset_index(drop=True)
data['MainData'] = pd.DataFrame.merge(data['MainData'], data['Service']).reset_index(drop=True)
arr = []
for i in range(data['MainData'].shape[0]):
    if data['MainData']['유사_업종_점포_수'][i] == 0 and data['MainData']['당월_매출_금액'][i] > 0:
        arr.append(1)
    else:
        arr.append(data['MainData']['유사_업종_점포_수'][i])
data['MainData']['유사_업종_점포_수'] = arr
data['MainData']['점포당_매출'] = data['MainData']['당월_매출_금액'] / (data['MainData']['유사_업종_점포_수'])
data['MainData']

Load district data...
Load sales data...
Make service data...
Load stores data...
Load floating data...
Load people data...


Unnamed: 0,상권_코드,상권_코드_명,시군구명,기준_년_코드,기준_분기_코드,서비스_업종_코드,서비스_업종_코드_명,당월_매출_금액,당월_매출_건수,주중_매출_비율,...,점포_수,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수,중분류_서비스,대분류_서비스,점포당_매출
0,1001453,낙성대시장,관악구,2014,1,CS100001,한식음식점,64641007.0,4157,68,...,9,9,0,0,0,0,0,음식업,외식업,7.182334e+06
1,1001453,낙성대시장,관악구,2014,2,CS100001,한식음식점,79979074.0,5380,72,...,9,9,0,0,0,0,0,음식업,외식업,8.886564e+06
2,1001453,낙성대시장,관악구,2014,3,CS100001,한식음식점,138056318.0,7218,67,...,9,10,10,1,0,0,1,음식업,외식업,1.380563e+07
3,1001453,낙성대시장,관악구,2014,4,CS100001,한식음식점,173323601.0,8117,69,...,9,10,0,0,0,0,1,음식업,외식업,1.733236e+07
4,1001453,낙성대시장,관악구,2015,1,CS100001,한식음식점,130943628.0,5272,70,...,9,10,0,0,0,0,1,음식업,외식업,1.309436e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673696,1000383,상계로1길,노원구,2020,1,CS200036,고시원,12234125.0,26,100,...,2,2,0,0,0,0,0,여가오락,도소매업,6.117062e+06
673697,1000383,상계로1길,노원구,2020,2,CS200036,고시원,8757058.0,26,0,...,2,2,50,1,0,0,0,여가오락,도소매업,4.378529e+06
673698,1001263,서울 노원구 노원역_3,노원구,2019,2,CS200036,고시원,1690141.0,12,33,...,2,2,0,0,0,0,0,여가오락,도소매업,8.450705e+05
673699,1001263,서울 노원구 노원역_3,노원구,2019,3,CS200036,고시원,23195005.0,82,94,...,2,2,0,0,0,0,0,여가오락,도소매업,1.159750e+07


In [56]:
arr = []
for i in range(data['MainData'].shape[0]):
    arr.append('{}_{}'.format(data['MainData']['기준_년_코드'][i], data['MainData']['기준_분기_코드'][i]))
data['MainData']['년도_분기'] = arr

In [57]:
data['MainData'].sort_values(['기준_년_코드', '기준_분기_코드'])

Unnamed: 0,상권_코드,상권_코드_명,시군구명,기준_년_코드,기준_분기_코드,서비스_업종_코드,서비스_업종_코드_명,당월_매출_금액,당월_매출_건수,주중_매출_비율,...,유사_업종_점포_수,개업_율,개업_점포_수,폐업_률,폐업_점포_수,프랜차이즈_점포_수,중분류_서비스,대분류_서비스,점포당_매출,년도_분기
0,1001453,낙성대시장,관악구,2014,1,CS100001,한식음식점,64641007.0,4157,68,...,9,0,0,0,0,0,음식업,외식업,7.182334e+06,2014_1
26,1001454,봉천제일종합시장,관악구,2014,1,CS100001,한식음식점,251044944.0,7562,70,...,7,0,0,14,1,0,음식업,외식업,3.586356e+07,2014_1
52,1001452,중부시장,관악구,2014,1,CS100001,한식음식점,400006490.0,12394,68,...,17,0,0,6,1,1,음식업,외식업,2.352979e+07,2014_1
78,1001460,관악신사시장,관악구,2014,1,CS100001,한식음식점,450026611.0,16329,71,...,20,0,0,10,2,0,음식업,외식업,2.250133e+07,2014_1
104,1001461,신림종합시장,관악구,2014,1,CS100001,한식음식점,62782914.0,1465,69,...,2,0,0,50,1,0,음식업,외식업,3.139146e+07,2014_1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673664,1000986,양재대로101길,강동구,2020,2,CS200036,고시원,102000000.0,354,80,...,3,0,0,0,0,0,여가오락,도소매업,3.400000e+07,2020_2
673679,1000445,연희로10길,서대문구,2020,2,CS200036,고시원,209199263.0,370,50,...,7,0,0,0,0,0,여가오락,도소매업,2.988561e+07,2020_2
673687,1001185,서울 서대문구 신촌역,서대문구,2020,2,CS200036,고시원,20861515.0,50,50,...,11,18,2,0,0,0,여가오락,도소매업,1.896501e+06,2020_2
673693,1001186,서울 서대문구 이대역,서대문구,2020,2,CS200036,고시원,7413886.0,11,100,...,6,0,0,17,1,0,여가오락,도소매업,1.235648e+06,2020_2


In [58]:
guArr = np.unique(data['District']['시군구명']) 

In [93]:
def getGap(df, x, target=None, mean=False):
    if mean:
        return pd.DataFrame.mean(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-1])][target]), pd.DataFrame.mean(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-1])][target]) - pd.DataFrame.mean(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-3])][target]), pd.DataFrame.mean(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-3])][target]) - pd.DataFrame.mean(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-5])][target])
    
    else:
        return pd.DataFrame.sum(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-1])][target]), pd.DataFrame.sum(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-1])][target]) - pd.DataFrame.sum(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-3])][target]), pd.DataFrame.sum(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-3])][target]) - pd.DataFrame.sum(df[(df['중분류_서비스'] == x) & (df['년도_분기'] == label[-5])][target])

for selectedReg in guArr:
    print(selectedReg)
    resDir = os.path.join(dir, 'result', selectedReg)
    if not os.path.isdir(resDir):
        os.mkdir(resDir)
    if not os.path.isdir(os.path.join(resDir, 'sales')):
        os.mkdir(os.path.join(resDir, 'sales'))
    if not os.path.isdir(os.path.join(resDir, 'floating')):
        os.mkdir(os.path.join(resDir, 'floating'))
    df = data['MainData'][data['MainData']['시군구명'] == selectedReg]
    serviceArr = list(set(list(data['Service']['중분류_서비스'])))
    serviceArr.remove('부동산')
    serviceArr.remove('생활서비스')
    serviceArr.remove('의류')
    newDf = []
    for x in serviceArr:
        y_data = []
        label = list(set(list(df['년도_분기'])))
        label.sort()
        #label = label[-4:]
        index = np.arange(len(label))
        for y in label:
            tmp = df[(df['중분류_서비스'] == x) & (df['년도_분기'] == y)]
            y_data.append(pd.DataFrame.mean(tmp['점포당_매출']))
        ax = plt.subplot(1, 1, 1)
        ax.bar(index, y_data, width=0.3)
        plt.title(x, fontsize=20, fontproperties=fontprop)
        plt.xlabel('년도_분기', fontsize=18, fontproperties=fontprop)
        plt.ylabel('점포당_매출', fontsize=18, fontproperties=fontprop)
        plt.xticks(index, label, fontsize=10)
        for l in ax.xaxis.get_ticklabels():
            l.set_rotation(45)
        plt.savefig(os.path.join(resDir, 'sales', '{}.png'.format(x)))
        plt.close()
        sales, salesGap, salesGap2 = getGap(df, x, '점포당_매출', True)
        stores, storesGap, storesGap2 = getGap(df, x, '유사_업종_점포_수')
        
        newDf.append([x, sales, salesGap, salesGap2, stores, storesGap, storesGap2])
    newDf = pd.DataFrame(newDf)

    newDf.columns = ['업종', '점포당 매출', '점포당 매출 변화(6개월)_코로나_후', '점포당 매출 변화(6개월)_코로나_전', '점포 수', '점포 수 변화(6개월)_코로나_후', '점포 수 변화(6개월)_코로나_전']
    newDf.to_csv(os.path.join(resDir, 'sales', 'sales.csv'), encoding='euc-kr', index=False)

    df = df[df['년도_분기'] == '2020_2']
    for x in serviceArr:
        tmp = df[df['중분류_서비스'] == x]
        label = ['월', '화', '수', '목', '금', '토', '일']
        index = np.arange(len(label))
        y_data = []
        for w in label:
            y_data.append(pd.DataFrame.mean(tmp['{}요일_매출_금액'.format(w)]))
        plt.bar(index, y_data, width=0.5)
        plt.title(x, fontsize=20, fontproperties=fontprop)
        plt.xlabel('요일', fontsize=18, fontproperties=fontprop)
        plt.ylabel('매출', fontsize=18, fontproperties=fontprop)
        plt.xticks(index, label, fontsize=15, fontproperties=fontprop)
        plt.savefig(os.path.join(resDir, 'sales', '요일별_{}.png'.format(x)))
        plt.close()

        label = ['00~06', '06~11', '11~14', '14~17', '17~21', '21~24']
        index = np.arange(len(label))
        y_data = []
        for w in label:
            y_data.append(pd.DataFrame.mean(tmp['시간대_{}_매출_금액'.format(w)]))
        plt.bar(index, y_data, width=0.5)
        plt.title(x, fontsize=20, fontproperties=fontprop)
        plt.xlabel('시간', fontsize=18, fontproperties=fontprop)
        plt.ylabel('매출', fontsize=18, fontproperties=fontprop)
        plt.xticks(index, label, fontsize=15, fontproperties=fontprop)
        plt.savefig(os.path.join(resDir, 'sales', '시간대별_{}.png'.format(x)))
        plt.close()

        label = [10, 20, 30, 40, 50, '60_이상']
        index = np.arange(len(label))
        y_data = []
        for w in label:
            y_data.append(pd.DataFrame.mean(tmp['연령대_{}_매출_금액'.format(w)]))
        plt.bar(index, y_data, width=0.5)
        plt.title(x, fontsize=20, fontproperties=fontprop)
        plt.xlabel('연령', fontsize=18, fontproperties=fontprop)
        plt.ylabel('매출', fontsize=18, fontproperties=fontprop)
        plt.xticks(index, label, fontsize=15, fontproperties=fontprop)
        plt.savefig(os.path.join(resDir, 'sales', '연령대별_{}.png'.format(x)))
        plt.close()    


    # floating results
    df = data['Floating'].sort_values(['기준_년_코드', '기준_분기_코드']).reset_index(drop=True)
    arr = []
    for i in range(df.shape[0]):
        arr.append('{}_{}'.format(df['기준_년_코드'][i], df['기준_분기_코드'][i]))
    df['년도_분기'] = arr
    df = df[(df['시군구명'] == selectedReg) & (df['년도_분기'] == '2020_1')]
    # 요일별
    bar_width = 0.35
    alpha = 0.5
    g = []
    label = [10, 20, 30, 40, 50, '60_이상']
    index = np.arange(len(label))
    for idx, s in enumerate(['남성', '여성']):
        g.append([])
        for o in label:
            tmp = []
            for w in ['월', '화', '수', '목', '금', '토', '일']:
                for t in range(1, 7):
                    selected = '{}연령대_{}_{}요일시간대_{}_유동인구_수'.format(s, o, w, t)
                    tmp.append(pd.DataFrame.mean(df[selected]))
            g[idx].append(np.average(tmp))
    p1 = plt.bar(index, g[0], 
             bar_width, 
             color='b', 
             alpha=alpha,
             label='Male')
    p2 = plt.bar(index + bar_width, g[1], 
                 bar_width, 
                 color='r', 
                 alpha=alpha,
                 label='Female')
    plt.title('연령별 추정유동인구', fontsize=20, fontproperties=fontprop)
    plt.xlabel('연령대', fontsize=18, fontproperties=fontprop)
    plt.ylabel('추정_유동_인구', fontsize=18, fontproperties=fontprop)
    plt.xticks(index, label, fontsize=15, fontproperties=fontprop)
    plt.legend((p1[0], p2[0]), ('Male', 'Female'), fontsize=15)
    plt.savefig(os.path.join(resDir, 'floating', '연령대별.png'))
    plt.close()

    g = []
    label = ['월', '화', '수', '목', '금', '토', '일']
    index = np.arange(len(label))
    for idx, s in enumerate(['남성', '여성']):
        g.append([])
        for w in label:
            tmp = []
            for o in [10, 20, 30, 40, 50, '60_이상']:
                for t in range(1, 7):
                    selected = '{}연령대_{}_{}요일시간대_{}_유동인구_수'.format(s, o, w, t)
                    tmp.append(pd.DataFrame.mean(df[selected]))
            g[idx].append(np.average(tmp))
    p1 = plt.bar(index, g[0], 
             bar_width, 
             color='b', 
             alpha=alpha,
             label='Male')
    p2 = plt.bar(index + bar_width, g[1], 
                 bar_width, 
                 color='r', 
                 alpha=alpha,
                 label='Female')
    plt.title('요일별 추정유동인구', fontsize=20, fontproperties=fontprop)
    plt.xlabel('연령대', fontsize=18, fontproperties=fontprop)
    plt.ylabel('추정_유동_인구', fontsize=18, fontproperties=fontprop)
    plt.xticks(index, label, fontsize=15, fontproperties=fontprop)
    plt.legend((p1[0], p2[0]), ('Male', 'Female'), fontsize=15)
    plt.savefig(os.path.join(resDir, 'floating', '요일별.png'))
    plt.close()

    g = []
    label = list(range(1, 7))
    index = np.arange(len(label))
    for idx, s in enumerate(['남성', '여성']):
        g.append([])
        for t in label:
            tmp = []
            for o in [10, 20, 30, 40, 50, '60_이상']:
                for w in ['월', '화', '수', '목', '금', '토', '일']:
                    selected = '{}연령대_{}_{}요일시간대_{}_유동인구_수'.format(s, o, w, t)
                    tmp.append(pd.DataFrame.mean(df[selected]))
            g[idx].append(np.average(tmp))
    p1 = plt.bar(index, g[0], 
             bar_width, 
             color='b', 
             alpha=alpha,
             label='Male')
    p2 = plt.bar(index + bar_width, g[1], 
                 bar_width, 
                 color='r', 
                 alpha=alpha,
                 label='Female')
    plt.title('시간대별 추정유동인구', fontsize=20, fontproperties=fontprop)
    plt.xlabel('연령대', fontsize=18, fontproperties=fontprop)
    plt.ylabel('추정_유동_인구', fontsize=18, fontproperties=fontprop)
    plt.xticks(index, label, fontsize=15, fontproperties=fontprop)
    plt.legend((p1[0], p2[0]), ('Male', 'Female'), fontsize=15)
    plt.savefig(os.path.join(resDir, 'floating', '시간대별.png'))
    plt.close()

강남구
강동구
강북구
강서구
관악구
광진구
구로구
금천구
노원구
도봉구
동대문구
동작구
마포구
서대문구
서초구
성동구
성북구
송파구
양천구
영등포구
용산구
은평구
종로구
중구
중랑구


In [111]:
tmp = []
for selectedReg in guArr:
    df = pd.read_csv(os.path.join(dir, 'result', selectedReg, 'sales', 'sales.csv'), encoding='euc-kr')
    df['구'] = [selectedReg] * df.shape[0]
    tmp.append(df)
df = pd.concat(tmp)

df['매출_변화_비율_코로나_전'] = df['점포당 매출 변화(6개월)_코로나_전'] / (df['점포당 매출'] - df['점포당 매출 변화(6개월)_코로나_전'] - df['점포당 매출 변화(6개월)_코로나_후'])
df['점포수_변화_비율_코로나_전'] = df['점포 수 변화(6개월)_코로나_전'] / (df['점포 수'] - df['점포 수 변화(6개월)_코로나_전'] - df['점포 수 변화(6개월)_코로나_후'])
df['매출_변화_비율_코로나_후'] = df['점포당 매출 변화(6개월)_코로나_후'] / (df['점포당 매출'] - df['점포당 매출 변화(6개월)_코로나_후'])
df['점포수_변화_비율_코로나_후'] = df['점포 수 변화(6개월)_코로나_후'] / (df['점포 수'] - df['점포 수 변화(6개월)_코로나_후'])

df = df.sort_values(['매출_변화_비율_코로나_후', '점포수_변화_비율_코로나_후']).reset_index(drop=True)

newDf = df.groupby(['업종']).mean()
newDf= newDf.sort_values(['매출_변화_비율_코로나_후', '점포수_변화_비율_코로나_후'])
newDf.to_csv(os.path.join(dir, 'res.csv'), encoding='euc-kr', index=True)

newDf2 = df.groupby(['구']).mean()
newDf2= newDf2.sort_values(['매출_변화_비율_코로나_후', '점포수_변화_비율_코로나_후'])
newDf2.to_csv(os.path.join(dir, 'res2.csv'), encoding='euc-kr', index=True)


In [146]:
newDir = os.path.join(dir, 'news', '용산')
csvFiles = []
for idx, f in enumerate(os.listdir(newDir)):
    csvFiles.append(pd.read_excel(os.path.join(newDir, f)))
newsDf = pd.concat(csvFiles)['특성추출(가중치순 상위 50개)']
newsDf

0      lg유플러스,방문객,유플러스,일상비일상의틈,소비자,강남역,5g,포토존,휴대폰,방문객...
1      이태원,서울시,용산구의회,용산,구의회,결의안,100만,임대주택,김정재,성장현,서울신...
2      경리단길,전망대,용산구,이태원,코로나19,활성화,세계음식문화거리,서울,단길,연합회,...
3      이태원,용산구,골목상권,활성화,성장현,소상공인,코로나19,연합회,신한은행,시설개선,...
4      이태원,활성화,소상공인,성장현,신한은행,코로나19,용산구,세계음식거리,골목상권,연합...
                             ...                        
202     한샘,리하우스,중국,부동산,10조,중국사업,강승수,논현,인스테리어,서울,직매장,스타트업
203    스케이트장,야외스케이트장,눈썰매장,빙상장,썰매장,지자체,광주시청,겨울철,이용객,시민...
204    팀장,본부장,센터장,도서관장,교육원,부국장,총무부장,4급,연구소장,대학원장,최재경,...
205    본부장,팀장,센터장,지역본부장,보도국,대우,상무이사,총무부장,화승,도서관장,전무이사...
206              서울,부동산,전문가,김학렬,강남,고준석,과천,하남,가능성,권대중,전셋값
Name: 특성추출(가중치순 상위 50개), Length: 207, dtype: object

In [147]:
tmp = []
for i in range(newsDf.shape[0]):
    tmp2 = newsDf[i].split(',')
    for j in range(len(tmp2)):
        tmp.append(tmp2[j])
tmp = pd.DataFrame(tmp)
tmp.columns = ['Word']
tmp = pd.DataFrame(tmp['Word'].value_counts()).reset_index()
tmp.columns = ['Word', 'Freq']
tmp.head(30)

Unnamed: 0,Word,Freq
0,서울,122
1,용산,96
2,용산구,55
3,부동산,48
4,코로나19,45
5,이태원,40
6,서울시,36
7,성장현,29
8,구청장,26
9,소상공인,22
