In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import os
import datetime
pd.options.display.max_rows = 20

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

import warnings
warnings.filterwarnings(action='ignore')

## 아래는 matplotlib 한국어 표시해주는 코드

from matplotlib import font_manager, rc
font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)

# 생활인구

## 코드파일 처리

In [103]:
dcode = pd.read_excel('./data/행정동코드.xlsx',encoding='cp949')
dcode = dcode.iloc[:,0:4]
dcode = dcode[dcode.시도명 =="서울특별시"]
dcode = dcode.drop_duplicates()
dcode.columns = ["code","시도명","구","동"]
dcode['code'] = dcode['code'].map(lambda x : str(x)[:-2])
dcode.head()

Unnamed: 0,code,시도명,구,동
0,11000000,서울특별시,,
1,11110000,서울특별시,종로구,
2,11110515,서울특별시,종로구,청운효자동
12,11110530,서울특별시,종로구,사직동
24,11110540,서울특별시,종로구,삼청동


## 생활인구파일 : 일별로 합산

In [150]:
def people_count_sum(df):    
    df = df.reset_index().iloc[:,0:4]
    df.columns = ["날짜","hour","code","생활인구수"]
    #df.drop(['hour'], axis = 1,inplace = True)
    df_return = df.groupby(['날짜', 'code']).apply(func)
    df2 = df_return.reset_index()
    df2['code'] = df2['code'].map(lambda x : str(x))
    res = pd.merge(df2,dcode,on="code")
    res.drop(['code'],axis = 1, inplace = True)
    return res

In [151]:
def func(x):
    d={}
    d['생활인구수'] = (x['생활인구수'].sum())

    return pd.Series(d, index=['생활인구수'])

In [152]:
list_ = []
for k in range(2,9):
    data = pd.read_csv(f'./data/LOCAL_PEOPLE_DONG_20200{k}.csv',encoding ='utf-8')
    df = people_count_sum(data)
    list_.append(df)

In [153]:
for k in range(2,9):
    data = pd.read_csv(f'./data/LOCAL_PEOPLE_DONG_20190{k}.csv',encoding ='utf-8')
    df = people_count_sum(data)
    list_.append(df)

## '구별'로 묶기

In [154]:
def func_dong(x):
    d={}
    d['생활인구수'] = (x['생활인구수'].sum())

    return pd.Series(d, index=['생활인구수'])

In [155]:
res = pd.concat(list_)
res.head()

Unnamed: 0,날짜,생활인구수,시도명,구,동
0,20200201,367962.2054,서울특별시,종로구,청운효자동
1,20200202,357848.5414,서울특별시,종로구,청운효자동
2,20200203,384159.4535,서울특별시,종로구,청운효자동
3,20200204,388443.1466,서울특별시,종로구,청운효자동
4,20200205,396399.9476,서울특별시,종로구,청운효자동


In [156]:
res_return = res.groupby(['날짜', '구']).apply(func_dong)
res1 = res_return.reset_index()

## 저장

In [158]:
res.to_csv('생활인구_전처리.csv',encoding='cp949') #동포함

In [160]:
res1.to_csv('생활인구_전처리_구.csv',encoding='cp949') #구까지만

# 유동인구

In [161]:
def yd_sum(yd):
    yd.drop(['시간(1시간단위)','연령대(10세단위)','성별'],axis = 1, inplace = True)
    yd.columns = ["날짜","시","구","유동인구수"]
    yd_return = yd.groupby(['날짜', '구']).apply(func_yd)
    yd2 = yd_return.reset_index()
    return yd2

In [162]:
def func_yd(x):
    d={}
    d['유동인구수'] = (x['유동인구수'].sum())

    return pd.Series(d, index=['유동인구수'])

In [163]:
list_yd = []
for k in range(2,9):
    data = pd.read_csv(f'./data/Floating_Population_200{k}.csv',encoding ='utf-8')
    yd = yd_sum(data)
    list_yd.append(yd)

In [164]:
for k in range(3,9):
    data = pd.read_csv(f'./data/FLT_SEOUL_0{k}MONTH.csv',encoding='utf-8')
    yd = yd_sum(data)
    list_yd.append(yd)

In [165]:
res2 = pd.concat(list_yd)
res2

Unnamed: 0,날짜,구,유동인구수
0,20200201,강남구,12636840
1,20200201,강동구,8339770
2,20200201,강북구,5966500
3,20200201,강서구,10864880
4,20200201,관악구,9966300
...,...,...,...
770,20190831,용산구,5143930
771,20190831,은평구,7981020
772,20190831,종로구,4977300
773,20190831,중구,4532980


In [149]:
res2.to_csv('유동인구_전처리.csv',encoding='cp949')

# 생활/유동 합치기

In [166]:
sum_all = pd.merge(res1, res2, how='outer', on = ["날짜","구"])
sum_all

Unnamed: 0,날짜,구,생활인구수,유동인구수
0,20190201,강남구,2.045038e+07,
1,20190201,강동구,1.181991e+07,
2,20190201,강북구,7.271637e+06,
3,20190201,강서구,1.338179e+07,
4,20190201,관악구,1.187355e+07,
...,...,...,...,...
10620,20200831,용산구,6.772697e+06,5104870.0
10621,20200831,은평구,1.079006e+07,7987210.0
10622,20200831,종로구,6.549824e+06,5137120.0
10623,20200831,중구,6.420853e+06,5528340.0


In [167]:
sum_all.isnull().sum()

날짜         0
구          0
생활인구수      0
유동인구수    725
dtype: int64

In [168]:
sum_all.to_csv('생활유동합침.csv',encoding='cp949')