# 수원 격자단위 분석
### 📌4개시군구 분석에서 추출했던 특성들을 같은 방식으로 격자에서 추출해보자.

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString
from tqdm import tqdm 
import folium as f
import warnings 
warnings.filterwarnings('ignore')
tqdm.pandas()

  shapely_geos_version, geos_capi_version_string


In [2]:
import matplotlib.font_manager as fm
import matplotlib as mat
## 폰트 경로
font_path = './data/NanumBarunGothic.ttf' 
## FontProperties 인스턴스 생성
font_prop = fm.FontProperties(fname=font_path)
## 폰트 이름
font_name = font_prop.get_name()
plt.rcParams['font.family']= 'NanumBarunGothic'

In [3]:
# 수원 구 단위 영역
kk_area = gpd.GeoDataFrame.from_file('data/LARD_ADM_SECT_SGG_41.shp', encoding='cp949')
kk_area = kk_area.to_crs({'init':'epsg:4326'}) # 좌표계 epsg : 4326
suwon_area = kk_area[kk_area['SGG_NM'].str.contains('수원')]
suwon_area

Unnamed: 0,ADM_SECT_C,SGG_NM,SGG_OID,COL_ADM_SE,GID,geometry
38,41115,수원시팔달구,340,41110,242,"MULTIPOLYGON (((126.99527 37.27383, 126.99529 ..."
39,41111,수원시장안구,1649,41110,243,"POLYGON ((126.96335 37.30029, 126.96338 37.300..."
40,41117,수원시영통구,2610,41110,244,"POLYGON ((127.03216 37.30956, 127.03229 37.309..."
41,41113,수원시권선구,1313,41110,245,"POLYGON ((126.92697 37.27208, 126.92731 37.272..."


In [4]:
# 수원시 표준격자 250x250
suwon_grid = gpd.read_file('./data/32.수원시_표준격자(250M_x_250M).geojson')
suwon_grid['total_area'] =suwon_grid['geometry'].to_crs({'init': 'epsg:6933'})\
               .map(lambda p: p.area / 10**6 )
suwon_grid.head(3)

Unnamed: 0,gid,geometry,total_area
0,다사49aa19ab,"MULTIPOLYGON (((126.92477 37.27077, 126.92475 ...",0.062546
1,다사49aa19ba,"MULTIPOLYGON (((126.92475 37.27302, 126.92473 ...",0.062546
2,다사49aa20aa,"MULTIPOLYGON (((126.92472 37.27753, 126.92470 ...",0.062546


# 1. 격자단위 인구/112 신고 데이터 전처리
---
## 📢 격자별 인구 특성과 112 신고수를 추출
- compas에서 제공된 데이터 중 인구정보, 112 신고수 (격자기준)를 포함한 데이터
- 30.성연령별거주인구격자 / 1.112신고이력 외 4개시군구 feature 추출 시 사용했던 데이터

In [5]:
suwon_pop = gpd.read_file('./data/30.수원시_성연령별_거주인구격자(250M_X_250M).geojson')
report_log = pd.read_csv('./data/1.수원시_112신고이력(격자_관할경계매핑).csv')

### <1> 인구

In [6]:
suwon_pop.fillna(0, inplace=True) # 인구가 비어있는 격자에는 fillNa
# total pop (총 인구수) 구하기
suwon_pop['total_pop'] = suwon_pop['m_20g_pop'] + suwon_pop['w_20g_pop'] + suwon_pop['m_30g_pop'] + suwon_pop['w_30g_pop'] + suwon_pop['m_40g_pop'] + \
    suwon_pop['w_40g_pop'] + suwon_pop['m_50g_pop'] + suwon_pop['w_50g_pop'] + suwon_pop['m_60g_pop'] +suwon_pop['w_60g_pop'] + \
       suwon_pop['m_70g_pop'] + suwon_pop['w_70g_pop'] + suwon_pop['m_80g_pop'] + suwon_pop['w_80g_pop'] + suwon_pop['m_90g_pop'] + \
       suwon_pop['w_90g_pop'] + suwon_pop['m_100_pop'] + suwon_pop['w_100g_pop']
# 0ld pop(60 ~ 노령 인구) 구하기
suwon_pop['old_pop'] = suwon_pop['m_60g_pop'] +suwon_pop['w_60g_pop'] + \
       suwon_pop['m_70g_pop'] + suwon_pop['w_70g_pop'] + suwon_pop['m_80g_pop'] + suwon_pop['w_80g_pop'] + suwon_pop['m_90g_pop'] + \
       suwon_pop['w_90g_pop'] + suwon_pop['m_100_pop'] + suwon_pop['w_100g_pop']

### <2> 112신고

#### 격자 별 TOTAL 신고 수를 count

In [7]:
# 신고수 total
total_report = report_log.groupby('gid').count().reset_index()[['gid','case_type']]
total_report = total_report.rename(columns={'case_type' : 'total_cnt'})
total_report.sort_values('total_cnt', ascending=False).head()

Unnamed: 0,gid,total_cnt
673,다사55bb18bb,4729
1055,다사58ab18ba,3447
672,다사55bb18ba,3044
708,다사56aa19aa,2676
818,다사56bb21ab,2560


### case_type 분류
<li>*** case_type을 <strong>5가지 category</strong>로 분류 해봄 ***</li>
<li>1. 교통 관련 접수 : '교통불편', '교통사고','교통위반', '무전취식승차' </li>
<li>2. 시비, 폭력 관련 접수 : '시비', '가정폭력', '폭력', '행패소란'</li>
<li>3. 절도, 기타 경범죄 관련 접수 : '절도', '기타경범'. '도박', '기타형사범'</li>
<li>4. 서비스 관련 접수 : '서비스요청', '상담문의', '보호조치', '분실습득'</li>
<li>5. 기타 접수 : '비상벨', '위험방지', '소음', '기타_타기관', '주취자', '실종(실종아동 등)', '청소년비행' </li>

In [8]:
traffic_list = ['교통불편', '교통사고','교통위반', '무전취식승차']
vio_list = [ '시비', '가정폭력', '폭력', '행패소란']
crime_list = [ '절도', '기타경범', '도박','기타형사범' ]
serv_list = ['서비스요청', '상담문의', '보호조치', '분실습득']
etc_list = ['비상벨', '위험방지', '소음', '기타_타기관', '주취자', '실종(실종아동 등)' , '청소년비행']

In [9]:
report_log['case_cat'] = report_log['case_type'].apply(lambda x : '교통' if x in traffic_list else x)
report_log['case_cat'] = report_log['case_cat'].apply(lambda x : '시비/폭력' if x in vio_list else x)
report_log['case_cat'] = report_log['case_cat'].apply(lambda x : '절도/경범' if x in crime_list else x)
report_log['case_cat'] = report_log['case_cat'].apply(lambda x : '서비스' if x in serv_list else x)
report_log['case_cat'] = report_log['case_cat'].apply(lambda x : '기타' if x in etc_list else x)
report_log['case_cat'].value_counts()

기타       56431
서비스      54253
시비/폭력    43852
절도/경범    38748
교통       30480
Name: case_cat, dtype: int64

In [10]:
report_case_cnt = report_log.groupby(['gid','case_cat']).count().reset_index()[['gid','case_cat','case_type']]
report_case_cnt.head(3)

Unnamed: 0,gid,case_cat,case_type
0,다사49ab20aa,기타,1
1,다사49ba19ba,교통,2
2,다사49ba19ba,기타,4


In [11]:
temp_data = total_report[['gid', 'total_cnt']].drop_duplicates()
for i in report_case_cnt['case_cat'].unique() :
    temp_data[i+ '_case_cnt'] = 0
for i in range(len(report_case_cnt)) :
    temp_name = report_case_cnt.loc[i, 'case_cat']
    temp = temp_data[temp_data['gid']==report_case_cnt['gid'].iloc[i]]
    temp[temp_name] = report_case_cnt['case_type'].iloc[i]
    temp_data.loc[temp.index , temp_name+'_case_cnt'] = report_case_cnt[report_case_cnt['case_cat']==temp_name].loc[i, 'case_type']
temp_data.head(3)

Unnamed: 0,gid,total_cnt,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt
0,다사49ab20aa,1,1,0,0,0,0
1,다사49ba19ba,11,4,2,2,3,0
2,다사49ba19bb,12,6,2,1,1,2


#### 격자인구데이터와 112신고데이터를 Merge합니다.

In [12]:
report_pop = pd.merge(suwon_pop, temp_data, how='outer')
report_pop.fillna(0, inplace=True)
report_pop.head(3)

Unnamed: 0,gid,m_20g_pop,w_20g_pop,m_30g_pop,w_30g_pop,m_40g_pop,w_40g_pop,m_50g_pop,w_50g_pop,m_60g_pop,...,w_100g_pop,geometry,total_pop,old_pop,total_cnt,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt
0,다사49aa19ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,"POLYGON ((949000.000 1919250.000, 949000.000 1...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,다사49aa19ba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,"POLYGON ((949000.000 1919500.000, 949000.000 1...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,다사49aa20aa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,"POLYGON ((949000.000 1920000.000, 949000.000 1...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### *** % 단위 피쳐를 추가합니다
1. 교통신고비율 : 전체 신고 수 / 교통 신고 수 * 100
2. 기타및서비스비율 : 전체 신고 수 / 기타및서비스 신고 수  * 100
3. 절도폭력및경범비율 : 전체 신고 수 / 절도폭력 신고 수  * 100

In [13]:
report_pop['교통신고비율'] =  report_pop['교통_case_cnt'] / report_pop['total_cnt'] * 100
report_pop['기타및서비스비율'] =  (report_pop['기타_case_cnt'] + report_pop['서비스_case_cnt']) / report_pop['total_cnt'] * 100
report_pop['절도폭력및경범비율'] =  (report_pop['절도/경범_case_cnt'] + report_pop['시비/폭력_case_cnt']) / report_pop['total_cnt'] * 100
report_pop.fillna(0, inplace=True)
report_pop.head(3)

Unnamed: 0,gid,m_20g_pop,w_20g_pop,m_30g_pop,w_30g_pop,m_40g_pop,w_40g_pop,m_50g_pop,w_50g_pop,m_60g_pop,...,old_pop,total_cnt,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt,교통신고비율,기타및서비스비율,절도폭력및경범비율
0,다사49aa19ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,다사49aa19ba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,다사49aa20aa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
# report_pop.columns 컬럼확인
report_pop_ = report_pop.copy()
report_pop_[['gid','total_pop','total_cnt', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율']].to_csv('./data/격자기준신고데이터.csv')

# 2. 격자단위 용도지역도, 토지이용계획도, 주택노후도 데이터 전처리
---
## 📢 격자별 토지,주택 특성을 추출
- compas에서 제공된 데이터 중 상단의 정보를 포함한 데이터
- 9.용도지역도 / 41.토지이용계획도 / 7.주택노후도
- 코드정의서 파일을 활용하여 용도지역지구코드를 매칭

## <1> 용도지역도

In [15]:
use_area = gpd.read_file('./data/9.4개시도_용도지역도.geojson')
use_area = use_area[use_area['COL_ADM_SE']=='41110'] # 41110 : 수원시
# 실면적 구하기
# use_area['area'] =use_area['geometry'].to_crs({'init': 'epsg:6933'})\
#                .map(lambda p: p.area / 10**6 ) 
# use_area['SGG_OID'] =  use_area['SGG_OID'].astype('int64')
use_area.head()

Unnamed: 0,MNUM,ALIAS,REMARK,NTFDATE,SGG_OID,COL_ADM_SE,geometry
6003,37400004111020090126UQA2200011000,,,,1132.0,41110,"MULTIPOLYGON (((127.02267 37.25099, 127.02263 ..."
6004,37400004111020160314UQA1220403000,,,,27217.0,41110,"MULTIPOLYGON (((127.04568 37.23551, 127.04570 ..."
6005,16110004111020120001UQA1220001000,,,,1596.0,41110,"MULTIPOLYGON (((127.05192 37.28382, 127.05201 ..."
6006,16110004111020120001UQA1220026000,,,,1597.0,41110,"MULTIPOLYGON (((127.06355 37.27469, 127.06436 ..."
6007,16110004111020120001UQA1220005000,,,,1598.0,41110,"MULTIPOLYGON (((127.06826 37.29386, 127.06878 ..."


In [16]:
grid_use_area = gpd.overlay(use_area, suwon_grid, how='intersection')

In [17]:
grid_use_area['use_cd'] = grid_use_area['MNUM'].apply(lambda x : x[-13:-7]) # 용도지역지구코드
code_nm = pd.read_excel('./data/코드정의서.xlsx', header=960) # 용도지역지구구분코드
drop_idx = np.arange(1164, len(code_nm))
code_nm.drop(drop_idx, inplace=True)

In [18]:
# 코드정의서에 따라 용도지역구분코드명을 도출해 봅시다
code_dict = code_nm.set_index('코드').to_dict()['코드명'] # code name to dictionary
grid_use_area['use_nm'] = grid_use_area['use_cd'].apply(lambda x: code_dict[x] if x in code_dict.keys() else x)
grid_use_area['use_nm'].value_counts()

add_code = {
    'UQA500' : '도시지역미지정',
    'UQB000' : '도시지역기타',
    'UQA999' : '관리지역미분류'
}
grid_use_area['use_nm'] = grid_use_area['use_nm'].apply(lambda x: add_code[x] if x in add_code.keys() else x)

In [19]:
len(grid_use_area)

5552

In [20]:
# grid_use_area.drop_duplicates(subset='geometry')
grid_use_area['area'] = gpd.GeoDataFrame(grid_use_area)['geometry'].to_crs({'init': 'epsg:6933'})\
               .map(lambda p: p.area / 10**6 ) # 실면적
grid_use_area.head()

Unnamed: 0,MNUM,ALIAS,REMARK,NTFDATE,SGG_OID,COL_ADM_SE,gid,total_area,geometry,use_cd,use_nm,area
0,37400004111020090126UQA2200011000,,,,1132.0,41110,다사57ab16bb,0.062547,"POLYGON ((127.01932 37.24992, 127.01929 37.249...",UQA220,일반상업지역,0.017128
1,37400004111020090126UQA1300015000,,,,1134.0,41110,다사57ab16bb,0.062547,"POLYGON ((127.01795 37.25090, 127.01947 37.250...",UQA130,준주거지역,0.011286
2,37400004111020140117UQA4300002001,,,,1948.0,41110,다사57ab16bb,0.062547,"MULTIPOLYGON (((127.01805 37.24880, 127.01807 ...",UQA430,자연녹지지역,0.002418
3,37400004111020140117UQA1220060000,,,,1945.0,41110,다사57ab16bb,0.062547,"POLYGON ((127.01891 37.24877, 127.01942 37.248...",UQA122,제2종일반주거지역,0.000329
4,37400004111020140117UQA2200012000,,,,1947.0,41110,다사57ab16bb,0.062547,"POLYGON ((127.01891 37.24877, 127.01893 37.248...",UQA220,일반상업지역,0.01766


### 용지 종류가 상세하게 되어 있어서(총 25종 !) 편의상 비슷한 용지 끼리 묶어서 재분류 해 보았다.
1. <strong>녹지지역</strong> : '계획관리지역', '자연환경보전지역', '보전관리지역', '~ 녹지지역'
2. <strong>주거지역</strong> : '~ 주거지역'
3. <strong>농경지역</strong> : '농림지역', '생산관리지역'
4. <strong>상업지역</strong> : '~ 상업지역'
5. <strong>공업지역</strong> : '~ 공업지역'
6. <strong>기타</strong> : 미지정, 기타

In [21]:
# grid_use_area = grid_use_area.groupby(['gid','use_nm']).sum().reset_index()[['gid','use_nm','area']]
grid_use_area['use_cat'] = grid_use_area['use_nm'].apply(lambda x : '녹지지역' if '녹지' in x or x in ['계획관리지역', '자연환경보전지역', '보전관리지역' ] else x)
grid_use_area['use_cat'] = grid_use_area['use_cat'].apply(lambda x : '주거지역' if '주거' in x else x)
grid_use_area['use_cat'] = grid_use_area['use_cat'].apply(lambda x : '농경지역' if x in ['농림지역', '생산관리지역'] else x)
grid_use_area['use_cat'] = grid_use_area['use_cat'].apply(lambda x : '상업지역' if '상업' in x else x)
grid_use_area['use_cat'] = grid_use_area['use_cat'].apply(lambda x : '공업지역' if '공업' in x else x)
grid_use_area['use_cat'] = grid_use_area['use_cat'].apply(lambda x : '기타' if x in ['도시지역미지정', '관리지역미분류', '도시지역기타'] else x)
grid_use_area.drop_duplicates(subset='geometry', inplace=True)
use_area_sum = grid_use_area.groupby(['gid','use_cat']).sum().reset_index()[['gid', 'use_cat',  'area']]
use_area_sum.head()

Unnamed: 0,gid,use_cat,area
0,다사49aa19ab,녹지지역,0.007545
1,다사49aa19ba,녹지지역,0.001092
2,다사49aa20aa,녹지지역,0.000756
3,다사49ab18ab,녹지지역,0.002962
4,다사49ab18ba,녹지지역,0.017015


In [22]:
# suwon_grid['gid'].nunique()

### 총 면적 대비 용도 면적의 비율을 산정하자
- 용도면적 비율(use_ratio) => 총면적(area) / 전체면적(total_area) 으로 계산

In [23]:
# 용도지역도 비율
area_con = pd.merge(use_area_sum, suwon_grid[['gid','total_area']], how='outer')
area_con['use_ratio'] = area_con['area'] / area_con['total_area']
area_con.head(3)

Unnamed: 0,gid,use_cat,area,total_area,use_ratio
0,다사49aa19ab,녹지지역,0.007545,0.062546,0.120638
1,다사49aa19ba,녹지지역,0.001092,0.062546,0.017453
2,다사49aa20aa,녹지지역,0.000756,0.062546,0.012093


In [24]:
# ratio > 1 값 변환
temp = area_con.groupby(['gid']).sum().reset_index()
temp = temp[temp['use_ratio'] > 1]
temp['sub_ratio'] = temp['use_ratio'] - 1
temp_dict = temp.set_index('gid').to_dict()['sub_ratio']
area_con['use_ratio'] = area_con.apply(lambda x : x['use_ratio'] - temp_dict[x['gid']] if x['gid'] in temp_dict.keys() and x['use_ratio'] > 1 else x['use_ratio'] ,axis=1)

In [25]:
area_con['use_cat'].unique()

array(['녹지지역', '주거지역', '상업지역', '공업지역'], dtype=object)

In [26]:
# gid 별 용도지역도 ratio
temp_data = area_con[['gid','total_area']].drop_duplicates(subset='gid')
for i in area_con['use_cat'].unique() :
    temp_data[i+ '_use_ratio'] = 0
for i in range(len(area_con)) :
    temp_name = area_con.loc[i, 'use_cat']
    temp = temp_data[temp_data['gid']==area_con['gid'].iloc[i]]
    temp[temp_name] = area_con['use_ratio'].iloc[i]
    temp_data.loc[temp.index , temp_name+'_use_ratio'] = area_con[area_con['use_cat']==temp_name].loc[i, 'use_ratio']
temp_data.head(3)

Unnamed: 0,gid,total_area,녹지지역_use_ratio,주거지역_use_ratio,상업지역_use_ratio,공업지역_use_ratio
0,다사49aa19ab,0.062546,0.120638,0.0,0.0,0.0
1,다사49aa19ba,0.062546,0.017453,0.0,0.0,0.0
2,다사49aa20aa,0.062546,0.012093,0.0,0.0,0.0


In [27]:
area_con_grid = temp_data.copy()
area_con_grid.sort_values(by='녹지지역_use_ratio', ascending=False).head(10)

Unnamed: 0,gid,total_area,녹지지역_use_ratio,주거지역_use_ratio,상업지역_use_ratio,공업지역_use_ratio
1322,다사55ab25ba,0.062547,1.0,0.0,0.0,0.0
971,다사54ab18aa,0.062547,1.0,0.0,0.0,0.0
2169,다사58aa25bb,0.062547,1.0,0.0,0.0,0.0
3096,다사61ba20ab,0.062548,1.0,0.0,0.0,0.0
972,다사54ab18ab,0.062547,1.0,0.0,0.0,0.0
973,다사54ab18ba,0.062547,1.0,0.0,0.0,0.0
1464,다사56aa15ba,0.062547,1.0,0.0,0.0,0.0
2168,다사58aa25ba,0.062547,1.0,0.0,0.0,0.0
2167,다사58aa25ab,0.062547,1.0,0.0,0.0,0.0
2166,다사58aa25aa,0.062547,1.0,0.0,0.0,0.0


## <2> 토지이용계획도

In [28]:
land_plan = gpd.read_file('./data/41.4개시도_토지이용계획도.geojson')
land_plan['sgg_cd'] = land_plan['ZONECODE'].apply(lambda x: x[:5])
land_plan['sgg_cd'].unique()

array(['41111', '41115', '41113', '41463', '48110', '48120', '48160',
       '48190', '48170', '11200', '11260', '11350', '11305', '11320',
       '41310', '11380', '41281', '11440', '11470', '11500', '28245',
       '41210', '11530', '41190', '41197', '11620', '11650', '41290',
       '11680', '11710', '11740', '41450'], dtype=object)

In [29]:
land_plan_ = land_plan[land_plan['sgg_cd'].isin(['41111','41113', '41115', '41117'])] # 수원
suwon_plan = gpd.overlay(land_plan_, suwon_grid, how='intersection')
suwon_plan.drop_duplicates(subset='geometry',inplace=True)
suwon_plan['area'] = gpd.GeoDataFrame(suwon_plan)['geometry'].to_crs({'init': 'epsg:6933'})\
               .map(lambda p: p.area / 10**6 ) 
suwon_plan.head(3)

Unnamed: 0,ORG_GID,BLOCKTYPE,BLOCKNAME,ZONECODE,ZONENAME,sgg_cd,gid,total_area,geometry,area
0,9104.0,완충녹지,완,41111KL1994002,수원천천2,41111,다사53ab22bb,0.062547,"POLYGON ((126.97332 37.30315, 126.97324 37.303...",0.001233
1,9922.0,학교,학,41111KL1994002,수원천천2,41111,다사53ab22bb,0.062547,"POLYGON ((126.97526 37.30372, 126.97509 37.303...",0.013156
2,13769.0,단독주택,단독,41111KL1994002,수원천천2,41111,다사53ab22bb,0.062547,"POLYGON ((126.97367 37.30411, 126.97350 37.304...",0.001288


### 이용계획도 용지가 상세하게 되어 있어서(총147종 !) 편의상 비슷한 용지 끼리 묶어서 재분류 해 보았다.
<sub>(각 카테고리에 해당하는 값은 하단 코드에서 정리)</sub>
1. <strong>주거용지</strong>
2. <strong>녹지</strong> 
3. <strong>공원용지</strong> 
4. <strong>학교용지</strong> 
5. <strong>도로용지</strong> 
6. <strong>상업용지</strong> 
7. <strong>공공용지</strong>
8. <strong>업무용지</strong>
9. <strong>시설용지</strong>

In [30]:
# 주거용지 분류
house_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['공동주택', '단독주택', '단독주택기타', '도시형생활주택용지', '다세대주택', '연립주택', '아파트', '아파트기타'])]
house_area['area_nm'] = '주거용지'
# 녹지 분류
green_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['완충녹지', '녹지', '경관녹지', '연결녹지', '녹지기타', '공원녹지'])]
green_area['area_nm'] = '녹지'
# 공원용지분류
park_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['근린공원', '공원', '어린이공원', '수변공원', '소공원', '문화공원', '체육공원', '도시자연공원',
       '역사공원', '공원녹지', '공원기타'])]
park_area['area_nm'] = '공원용지'
# 학교용지분류
school_area  = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['학교', '고등학교', '초등학교', '중학교', '학교기타', '교육시설', '청소년수련시설'])]
school_area['area_nm'] = '학교용지'
# 도로용지분류
load_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['자전거전용도로', '보행자전용도로', '도로'])]
load_area['area_nm'] = '도로용지'
# 상업용지분류
comm_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['상업용지', '상업시설', '근린상업', '일반상업', '중심상업', '숙박시설', '숙박시설기타', '판매및영업시설', '종합의료시설'])]
comm_area['area_nm'] = '상업용지'
# 공공용지분류
public_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['공공청사', '공공공지', '공공공지기타', '공공문화체육기타', '이전공공기관', '근린공공시설', '공공보육시설',
       '공공청사기타', '사회복지시설', '사회복지시설기타'])]
comm_area['area_nm'] = '공공용지'
# 업무,산업,공업용지분류
business_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['업무시설', '유통업무시설', '산업용지', '산업시설기타', '벤처및공업용지'])]
business_area['area_nm'] = '업무용지'
# 시설용지분류
facility_area = suwon_plan[suwon_plan['BLOCKTYPE'].isin(['유수시설', '자원재활용시설','폐기물처리시설','하수종말처리시설','폐수종말처리시설',
       '연구시설기타', '저류시설', '배수시설', '위험물저장및처리시설', '변전시설'])]
facility_area['area_nm'] = '시설용지'

In [31]:
plan_area = pd.concat([house_area, green_area, park_area, school_area, load_area, comm_area, public_area, business_area, facility_area])
plan_area_sum = plan_area.groupby(['gid','area_nm']).sum().reset_index()[['gid','area_nm', 'area']].rename(columns={'gid' : 'gid'})
plan_area_sum.head(3)

Unnamed: 0,gid,area_nm,area
0,다사49bb19ba,도로용지,0.000978
1,다사49bb19ba,학교용지,0.000246
2,다사49bb19bb,공원용지,0.003472


### 총 면적 대비 용도 면적의 비율을 산정하자
- 계획용도면적 비율(plan_area_ratio) => 계획용도별 면적(total_area) / 계획용도지역전체면적(plan_total_area) 으로 계산

In [32]:
plan_area_total = plan_area.groupby(['gid']).sum().reset_index()[['gid','area']].rename(columns={'gid' : 'gid', 'area' : 'plan_total_area' })
plan_area_total_df = pd.merge(plan_area_total, plan_area_sum, how='outer')
plan_area_total_df.dropna(subset=['area_nm'] , inplace=True)
plan_area_total_df['plan_total_area'].fillna(0, inplace=True)
plan_area_total_df['plan_area_ratio'] = plan_area_total_df['area'] / plan_area_total_df['plan_total_area']  * 100
plan_area_total_df['plan_area_ratio'].fillna(0, inplace=True)
plan_area_total_df.head()

Unnamed: 0,gid,plan_total_area,area_nm,area,plan_area_ratio
0,다사49bb19ba,0.001224,도로용지,0.000978,79.890911
1,다사49bb19ba,0.001224,학교용지,0.000246,20.109089
2,다사49bb19bb,0.02485,공원용지,0.003472,13.969839
3,다사49bb19bb,0.02485,도로용지,0.005074,20.417193
4,다사49bb19bb,0.02485,학교용지,0.014544,58.526908


In [33]:
temp_data = plan_area_total_df[['gid','plan_total_area']].drop_duplicates(subset='gid')
for i in plan_area_total_df['area_nm'].unique() :
    temp_data[i+ '_plan_use_ratio'] = 0
temp_data.head(3)

Unnamed: 0,gid,plan_total_area,도로용지_plan_use_ratio,학교용지_plan_use_ratio,공원용지_plan_use_ratio,녹지_plan_use_ratio,주거용지_plan_use_ratio,공공용지_plan_use_ratio,업무용지_plan_use_ratio,시설용지_plan_use_ratio
0,다사49bb19ba,0.001224,0,0,0,0,0,0,0,0
2,다사49bb19bb,0.02485,0,0,0,0,0,0,0,0
5,다사50aa18bb,0.002017,0,0,0,0,0,0,0,0


In [34]:
temp_data.reset_index(drop=True, inplace=True)
plan_area_total_df.reset_index(drop=True, inplace=True)
for i in range(len(plan_area_total_df)) :
    temp_name = plan_area_total_df.loc[i, 'area_nm']
    temp = temp_data[temp_data['gid']==plan_area_total_df['gid'].iloc[i]]
    temp[temp_name] = plan_area_total_df['plan_area_ratio'].iloc[i]
    temp_data.loc[temp.index , temp_name+'_plan_use_ratio'] = plan_area_total_df[plan_area_total_df['area_nm']==temp_name].loc[i, 'plan_area_ratio']
temp_data.head(3)

Unnamed: 0,gid,plan_total_area,도로용지_plan_use_ratio,학교용지_plan_use_ratio,공원용지_plan_use_ratio,녹지_plan_use_ratio,주거용지_plan_use_ratio,공공용지_plan_use_ratio,업무용지_plan_use_ratio,시설용지_plan_use_ratio
0,다사49bb19ba,0.001224,79.890911,20.109089,0.0,0.0,0.0,0.0,0.0,0.0
1,다사49bb19bb,0.02485,20.417193,58.526908,13.969839,0.0,0.0,0.0,0.0,0.0
2,다사50aa18bb,0.002017,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
plan_use_grid = temp_data.copy()

## <3> 주택노후도

In [36]:
house_old_con = gpd.read_file('./data/7.4개시도_주택노후도.geojson')
house_old_con.head()

Unnamed: 0,ORG_GID,BLD_NM,DONG_NM,PNU,STRCT_NM,USE_NM,USEAPR_DAY,OLD_YEAR,OLD_LEVEL,geometry
0,704627.0,,,1111017500107040000,기타강구조,단독주택,19910902,30.0,4.0,"MULTIPOLYGON (((127.02166 37.57731, 127.02165 ..."
1,704628.0,,,1111017500100560024,벽돌구조,단독주택,19670528,54.0,4.0,"MULTIPOLYGON (((127.01600 37.57725, 127.01596 ..."
2,704629.0,,,1111017500100560054,벽돌구조,단독주택,19621221,58.0,4.0,"MULTIPOLYGON (((127.01633 37.57723, 127.01633 ..."
3,704630.0,,,1111017500100560053,벽돌구조,단독주택,19790601,42.0,4.0,"MULTIPOLYGON (((127.01651 37.57717, 127.01646 ..."
4,704632.0,,,1111016500100280041,철근콘크리트구조,제2종근린생활시설,19940425,27.0,3.0,"MULTIPOLYGON (((127.00532 37.57728, 127.00534 ..."


In [37]:
# 법정동코드 매칭
dong_cd = pd.read_excel('./data/행정구역코드(법정동코드).xls')
house_old_con['법정동코드'] = house_old_con['PNU'].apply(lambda x : int(x[:10]))
house_old_df = pd.merge(dong_cd[['법정동코드','법정동명']], house_old_con)
house_old_df.head(2)

Unnamed: 0,법정동코드,법정동명,ORG_GID,BLD_NM,DONG_NM,PNU,STRCT_NM,USE_NM,USEAPR_DAY,OLD_YEAR,OLD_LEVEL,geometry
0,1111010100,서울특별시 종로구 청운동,706700.0,,,1111010100100550014,벽돌구조,제2종근린생활시설,19890919,31.0,4.0,"MULTIPOLYGON (((126.96626 37.58669, 126.96616 ..."
1,1111010100,서울특별시 종로구 청운동,725027.0,,,1111010100100890015,철근콘크리트구조,단독주택,19970820,24.0,3.0,"MULTIPOLYGON (((126.97198 37.58599, 126.97209 ..."


In [38]:
house_old_df['sgg_nm'] = house_old_df['법정동명'].apply(lambda x: x.split(' ')[1])
house_old_df['sgg_nm'].unique()

array(['종로구', '중구', '용산구', '성동구', '광진구', '동대문구', '중랑구', '성북구', '강북구',
       '도봉구', '노원구', '은평구', '서대문구', '마포구', '양천구', '강서구', '구로구', '금천구',
       '영등포구', '동작구', '관악구', '서초구', '강남구', '송파구', '강동구', '수원시', '창원시',
       '진주시'], dtype=object)

In [39]:
suwon_house_old = house_old_df[house_old_df['sgg_nm']=='수원시']
suwon_house_old = gpd.GeoDataFrame(suwon_house_old, crs='epsg:4326')
suwon_house_grid = gpd.overlay(suwon_house_old, suwon_grid, how='intersection')
suwon_house_grid.drop_duplicates(inplace=True, subset=['geometry'])
# 격자(gid)와 주택노후도 데이터 매칭
suwon_house_grid['area'] = suwon_house_grid['geometry'].to_crs({'init': 'epsg:6933'})\
               .map(lambda p: p.area / 10**6 ) 
suwon_house_grid['geometry'] = suwon_house_grid.centroid
suwon_house_grid.head(3)

Unnamed: 0,법정동코드,법정동명,ORG_GID,BLD_NM,DONG_NM,PNU,STRCT_NM,USE_NM,USEAPR_DAY,OLD_YEAR,OLD_LEVEL,sgg_nm,gid,total_area,geometry,area
0,4111112900,경기도 수원시 장안구 파장동,3217821.0,쌍용빌라,,4111112900102030009,벽돌구조,공동주택,19910329,30.0,4.0,수원시,다사55ab23ba,0.062547,POINT (126.99512 37.31140),0.000117
1,4111112900,경기도 수원시 장안구 파장동,3223690.0,청구빌라,,4111112900103850004,벽돌구조,공동주택,19901110,30.0,4.0,수원시,다사55ab23ba,0.062547,POINT (126.99782 37.30990),1e-06
2,4111112900,경기도 수원시 장안구 파장동,3226693.0,,,4111112900102740000,일반철골구조,노유자시설,20030630,18.0,2.0,수원시,다사55ab23ba,0.062547,POINT (126.99610 37.30996),0.000172


### 주택노후도 level별 area 구하기
#### 시,군,구 별 old_level(1~4단계)에 대한 총 면적을 산출합니다.

In [40]:
old_level_sum = suwon_house_grid.groupby(['gid','OLD_LEVEL']).sum().reset_index()[['gid','OLD_LEVEL','area']]
old_level_sum.head()

Unnamed: 0,gid,OLD_LEVEL,area
0,다사49ba19ba,3.0,0.000914
1,다사49ba19bb,3.0,0.000555
2,다사49ba19bb,4.0,9.6e-05
3,다사49ba20ab,4.0,0.000188
4,다사49ba20ba,4.0,1.2e-05


In [41]:
old_level_sum['gid'].nunique()

1355

In [42]:
temp_data = old_level_sum[['gid']].drop_duplicates()
for i in old_level_sum['OLD_LEVEL'].unique() :
    temp_data['area_old_level_'+str(i)[0]] = 0
for i in range(len(old_level_sum)) :
    temp_name = old_level_sum.loc[i, 'OLD_LEVEL']
    temp = temp_data[temp_data['gid']==old_level_sum['gid'].iloc[i]]
    temp[temp_name] = old_level_sum['area'].iloc[i]
    temp_data.loc[temp.index , 'area_old_level_'+str(temp_name)[0]] = old_level_sum[old_level_sum['OLD_LEVEL']==temp_name].loc[i, 'area']
temp_data.head(3)

Unnamed: 0,gid,area_old_level_3,area_old_level_4,area_old_level_1,area_old_level_2
0,다사49ba19ba,0.000914,0.0,0.0,0.0
1,다사49ba19bb,0.000555,9.6e-05,0.0,0.0
3,다사49ba20ab,0.0,0.000188,0.0,0.0


In [43]:
old_level_total = temp_data.copy()

### 주택노후년도 평균
#### 시,군,구 별 old_year의 평균을 산출합니다.

In [44]:
old_year_mean = suwon_house_grid.groupby('gid').mean().reset_index()[['gid','OLD_YEAR']]
old_year_mean['OLD_YEAR'] = old_year_mean['OLD_YEAR'].apply(lambda x: round(x, 1))
old_year_mean.head()

Unnamed: 0,gid,OLD_YEAR
0,다사49ba19ba,21.8
1,다사49ba19bb,27.7
2,다사49ba20ab,31.0
3,다사49ba20ba,31.0
4,다사49bb18bb,15.7


### 최종
#### 상단에서 추출한 시,군,구 별 OLD_YEAR 평균 및 old_level 별 면적 총 합 데이터를 Merge

In [45]:
house_total = pd.merge(old_year_mean, old_level_total, how='outer')
house_total.head(3)

Unnamed: 0,gid,OLD_YEAR,area_old_level_3,area_old_level_4,area_old_level_1,area_old_level_2
0,다사49ba19ba,21.8,0.000914,0.0,0.0,0.0
1,다사49ba19bb,27.7,0.000555,9.6e-05,0.0,0.0
2,다사49ba20ab,31.0,0.0,0.000188,0.0,0.0


# 4. 격자단위 시설물 데이터 전처리
---
## 격자별 시설물 특성을 추출
- compas에서 제공된 데이터 중 시설물 정보를 포함한 데이터
- 22.보안등현황 / 23.비상벨현황 / 24.cctv설치현황 / 27.공원현황
- 42.편의점현황 / 40.어린이놀이시설현황 / 8. 유흥업소현황

In [46]:
def geo_transform(DataFrame) :
    # csv to geopandas
    # lon, lat data를 geometry로 변경
    DataFrame['lat'] = DataFrame['lat'].astype(float)
    DataFrame['lon'] = DataFrame['lon'].astype(float)
    DataFrame['geometry'] = DataFrame.apply(lambda row : Point([row['lon'], row['lat']]), axis=1)
    DataFrame = gpd.GeoDataFrame(DataFrame, geometry='geometry')
    DataFrame.crs = {'init':'epsg:4326'}
    DataFrame = DataFrame.to_crs({'init':'epsg:4326'}) # 좌표계 epsg : 4326
    return DataFrame

In [47]:
slight_con = pd.read_csv('./data/22.4개시도_보안등현황.csv')
slight_con = geo_transform(slight_con)
bell_con = gpd.read_file('./data/23.4개시도_안전비상벨현황.geojson')
# bell_con = geo_transform(bell_con)
cctv_con = pd.read_csv('./data/24.4개시도_CCTV설치현황.csv')
cctv_con = geo_transform(cctv_con)

In [48]:
play_con = pd.read_csv('./data/40.4개시도_어린이놀이시설현황.csv')
play_con = geo_transform(play_con)
park_con = pd.read_csv('./data/27.4개시도_공원현황.csv')
park_con = geo_transform(park_con)

In [49]:
conv_con = gpd.read_file('./data/42.4개시도_편의점현황.geojson')
pub_con = gpd.read_file('./data/8.4개시도_유흥업소현황.geojson')

#### 1. 보안등

In [50]:
slight_con['sido'] = slight_con['address'].apply(lambda x: x.split(' ')[0] if ' ' in str(x) else x)
slight_con = slight_con[slight_con['sido']=='경기도']
len(slight_con)

13920

In [51]:
slight_grid = gpd.overlay(slight_con, suwon_grid, how='intersection')
# slight_grid.drop_duplicates(subset=['geometry'] , inplace=True)
# slight_grid.head(3)
slight_sum = slight_grid.groupby('gid').sum().reset_index()[['gid','securitylight_cnt']]
slight_sum.head(3)

Unnamed: 0,gid,securitylight_cnt
0,다사49ba21ab,1
1,다사49bb18bb,1
2,다사49bb19aa,4


#### 2. 비상벨

In [52]:
bell_grid = gpd.overlay(bell_con, suwon_grid, how='intersection')
# bell_grid.drop_duplicates(subset=['geometry'] , inplace=True)
# slight_grid.head(3)
bell_sum = bell_grid.groupby('gid').count().reset_index()[['gid','bell_us']]
bell_sum.head(3)

Unnamed: 0,gid,bell_us
0,다사50aa19ba,1
1,다사50ba21ab,1
2,다사51ba17bb,1


#### 3. cctv

In [53]:
cctv_grid = gpd.overlay(cctv_con, suwon_grid, how='intersection')
# cctv_grid.drop_duplicates(subset=['geometry'] , inplace=True)
# slight_grid.head(3)
cctv_sum = cctv_grid.groupby('gid').sum().reset_index()[['gid','cctv_cnt']]
cctv_sum.head(3)

Unnamed: 0,gid,cctv_cnt
0,다사49ab19ab,4
1,다사49ba19ab,3
2,다사49ba19bb,2


In [54]:
# 보안등 ,비상벨, cctv
temp = pd.merge(slight_sum, bell_sum, how='outer')
total1 = pd.merge(temp, cctv_sum, how='outer')
total1.fillna(0, inplace=True)
total1.head(3)

Unnamed: 0,gid,securitylight_cnt,bell_us,cctv_cnt
0,다사49ba21ab,1.0,0.0,0.0
1,다사49bb18bb,1.0,0.0,0.0
2,다사49bb19aa,4.0,0.0,4.0


#### 4. 어린이놀이시설

In [55]:
play_grid = gpd.overlay(play_con, suwon_grid, how='intersection')
# play_grid.drop_duplicates(subset=['geometry'] , inplace=True)
play_grid = play_grid.rename(columns={'fac_no' : 'play_cnt'})
# slight_grid.head(3)
play_sum = play_grid.groupby('gid').count().reset_index()[['gid','play_cnt']]
play_sum.head(3)

Unnamed: 0,gid,play_cnt
0,다사49bb19ba,2
1,다사50aa19aa,3
2,다사50aa19ab,2


#### 5. 공원

In [56]:
park_grid = gpd.overlay(park_con, suwon_grid, how='intersection')
# park_grid.drop_duplicates(subset=['geometry'] , inplace=True)
park_grid = park_grid.rename(columns={'park_nm' : 'park_cnt'})
# slight_grid.head(3)
park_sum = park_grid.groupby('gid').count().reset_index()[['gid','park_cnt']]
park_sum.head(3)

Unnamed: 0,gid,park_cnt
0,다사49bb21ba,1
1,다사50aa19ba,1
2,다사50aa19bb,1


In [57]:
total2 = pd.merge(play_sum, park_sum, how='outer')
total2.fillna(0, inplace=True)

#### 6. 편의점

In [58]:
conv_grid = gpd.overlay(conv_con, suwon_grid, how='intersection')
# conv_grid.drop_duplicates(subset=['geometry'] , inplace=True)
conv_grid = conv_grid.rename(columns={'STORE_ID' : 'conv_cnt'})
# slight_grid.head(3)
conv_sum = conv_grid.groupby('gid').count().reset_index()[['gid','conv_cnt']]
conv_sum.head(3)

Unnamed: 0,gid,conv_cnt
0,다사50aa19aa,2
1,다사50aa19bb,3
2,다사50aa21ba,2


#### 7. 유흥업소

In [59]:
pub_grid = gpd.overlay(pub_con, suwon_grid, how='intersection')
# pub_grid.drop_duplicates(subset=['geometry'] , inplace=True)
pub_grid = pub_grid.rename(columns={'STORE_ID' : 'pub_cnt'})
# slight_grid.head(3)
pub_sum = pub_grid.groupby('gid').count().reset_index()[['gid','pub_cnt']]
pub_sum.head(3)

Unnamed: 0,gid,pub_cnt
0,다사50ab21ba,1
1,다사50ba21ba,1
2,다사51ab19ba,2


In [60]:
total3 = pd.merge(conv_sum, pub_sum, how='outer')
total3.fillna(0, inplace=True)

#### 8. 종합

In [61]:
temp = pd.merge(total1, total2, how='outer')
facility_grid = pd.merge(temp, total3, how='outer')
facility_grid.fillna(0, inplace=True)
facility_grid.head(3)

Unnamed: 0,gid,securitylight_cnt,bell_us,cctv_cnt,play_cnt,park_cnt,conv_cnt,pub_cnt
0,다사49ba21ab,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,다사49bb18bb,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,다사49bb19aa,4.0,0.0,4.0,0.0,0.0,0.0,0.0


In [62]:
facility_grid_ = pd.merge(facility_grid, suwon_grid, how='outer')
facility_grid_.fillna(0, inplace=True)
facility_grid_ = facility_grid_.rename(columns={'bell_us' : 'bell_cnt'})

In [63]:
## -- 밀도 : 시설 수 / 면적(km2)
facility_grid_['cctv밀도'] = facility_grid_['cctv_cnt'] / facility_grid_['total_area']
facility_grid_['보안등밀도'] = facility_grid_['bell_cnt'] / facility_grid_['total_area']
facility_grid_['유흥업소밀도'] = facility_grid_['pub_cnt'] / facility_grid_['total_area']
facility_grid_['편의점밀도'] = facility_grid_['conv_cnt'] / facility_grid_['total_area']
facility_grid_['공원및놀이시설밀도'] = (facility_grid_['park_cnt'] + facility_grid_['play_cnt']) / facility_grid_['total_area']
facility_grid_.fillna(0, inplace=True)
facility_grid_.head(3)

Unnamed: 0,gid,securitylight_cnt,bell_cnt,cctv_cnt,play_cnt,park_cnt,conv_cnt,pub_cnt,geometry,total_area,cctv밀도,보안등밀도,유흥업소밀도,편의점밀도,공원및놀이시설밀도
0,다사49ba21ab,1.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((126.93027 37.28882, 126.93026 ...",0.062546,0.0,0.0,0.0,0.0,0.0
1,다사49bb18bb,1.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((126.93326 37.26630, 126.93324 ...",0.062546,0.0,0.0,0.0,0.0,0.0
2,다사49bb19aa,4.0,0.0,4.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((126.93324 37.26855, 126.93323 ...",0.062546,63.95277,0.0,0.0,0.0,0.0


## 5. 화재발생, 자살율

In [64]:
fire_sui_con = pd.read_csv('./data/4개시군구_자살율_화재발생통계.csv')
del fire_sui_con['Unnamed: 0']
suwon_fire_sui_con =  fire_sui_con[fire_sui_con['sgg_nm'].isin(['수원시','권선구','영통구','장안구','팔달구'])]
suwon_fire_sui_con.head()

Unnamed: 0,sgg_nm,acc_num,tot_vic_num,total_dmg,sido,인구10만명당자살율
0,수원시,547,30,9354298,경기도,24.9
2,권선구,166,6,3031434,경기도,25.3
3,영통구,122,4,3334068,경기도,17.7
4,장안구,145,8,1276466,경기도,23.0
5,팔달구,114,12,1712330,경기도,33.6


In [65]:
suwon_fire_sui_con['sgg_nm'].unique()
suwon_area['sgg_nm'] = suwon_area['SGG_NM'].apply(lambda x: x.replace('수원시', ''))

In [66]:
suwon_ = pd.merge(suwon_fire_sui_con, suwon_area[['sgg_nm','geometry']])
suwon_ = gpd.GeoDataFrame(suwon_, crs='epsg:4326')
fire_sui_grid = gpd.overlay(suwon_, suwon_grid, how='intersection')
fire_sui_grid.drop_duplicates(subset=['gid'], inplace=True)

In [67]:
gid_cnt = fire_sui_grid.groupby('sgg_nm').count().reset_index()[['sgg_nm','acc_num']].rename(columns={'acc_num' : 'gid_cnt'})

In [68]:
fire_sui_grid_ = pd.merge(fire_sui_grid ,gid_cnt)
grid_merge_ = pd.merge(fire_sui_grid_.drop(['geometry'], axis=1), report_pop, on ='gid')
# grid_merge_[['sgg_nm','gid', 'acc_num', 'tot_vic_num', 'total_dmg','total_cnt']]

In [69]:
# 시군구별 격자 1당 신고수 비율
sgg_total = grid_merge_.groupby('sgg_nm').sum()[['total_cnt', 'total_pop']].reset_index()
sgg_total.columns = ['sgg_nm' , 'sgg_total_cnt', 'sgg_total_pop']
sgg_grid_merge = pd.merge(grid_merge_[['sgg_nm','gid', 'acc_num', 'tot_vic_num', 'total_dmg','total_cnt','인구10만명당자살율']], sgg_total)

In [70]:
sgg_grid_merge['grid_sgg_ratio'] =  sgg_grid_merge['total_cnt'] / sgg_grid_merge['sgg_total_cnt'] * 100
sgg_grid_merge.head(3)

Unnamed: 0,sgg_nm,gid,acc_num,tot_vic_num,total_dmg,total_cnt,인구10만명당자살율,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio
0,권선구,다사49aa19ab,166,6,3031434,0.0,25.3,76415.0,326743.0,0.0
1,권선구,다사49aa19ba,166,6,3031434,0.0,25.3,76415.0,326743.0,0.0
2,권선구,다사49aa20aa,166,6,3031434,0.0,25.3,76415.0,326743.0,0.0


In [71]:
sgg_grid_merge['acc_num'] = sgg_grid_merge['acc_num'] * sgg_grid_merge['grid_sgg_ratio']
sgg_grid_merge['tot_vic_num'] = sgg_grid_merge['tot_vic_num'] * sgg_grid_merge['grid_sgg_ratio']
sgg_grid_merge['total_dmg'] = sgg_grid_merge['total_dmg'] * sgg_grid_merge['grid_sgg_ratio']
sgg_grid_merge.head()

Unnamed: 0,sgg_nm,gid,acc_num,tot_vic_num,total_dmg,total_cnt,인구10만명당자살율,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio
0,권선구,다사49aa19ab,0.0,0.0,0.0,0.0,25.3,76415.0,326743.0,0.0
1,권선구,다사49aa19ba,0.0,0.0,0.0,0.0,25.3,76415.0,326743.0,0.0
2,권선구,다사49aa20aa,0.0,0.0,0.0,0.0,25.3,76415.0,326743.0,0.0
3,권선구,다사49ab18ab,0.0,0.0,0.0,0.0,25.3,76415.0,326743.0,0.0
4,권선구,다사49ab18ba,0.0,0.0,0.0,0.0,25.3,76415.0,326743.0,0.0


In [72]:
pd.options.display.float_format = '{:.5f}'.format

In [73]:
fire_sui_grid.columns

Index(['sgg_nm', 'acc_num', 'tot_vic_num', 'total_dmg', 'sido', '인구10만명당자살율',
       'gid', 'total_area', 'geometry'],
      dtype='object')

In [74]:
# 격자별 112 신고수 비율에 따른 화재발생율, 인구 10만명당자살율
fire_sui_grid_ = sgg_grid_merge[['gid','acc_num','tot_vic_num', 'total_dmg', '인구10만명당자살율']]
fire_sui_grid_.head(3)

Unnamed: 0,gid,acc_num,tot_vic_num,total_dmg,인구10만명당자살율
0,다사49aa19ab,0.0,0.0,0.0,25.3
1,다사49aa19ba,0.0,0.0,0.0,25.3
2,다사49aa20aa,0.0,0.0,0.0,25.3


## 6. 관할서, 관할서영역

In [75]:
police_con = pd.read_csv('./data/39.4개시도_경찰관서현황.csv')
police_con = geo_transform(police_con)
police_border = gpd.read_file('./data/11.4개시도_경찰서_관할경계.geojson')

In [76]:
grid_police_con = gpd.overlay(police_con, suwon_grid, how='intersection')
grid_police_con.head()

Unnamed: 0,pname,psname,name,lon,lat,addr,gid,total_area,geometry
0,경기남부청,수원남부경찰서,곡선지구대,127.03004,37.2518,권선구 권선동 1268-5,다사58ab17aa,0.06255,POINT (127.03004 37.25180)
1,경기남부청,수원남부경찰서,광교파출소,127.04771,37.28956,경기도 수원시 영통구 이의동 1347-2,다사59bb21ab,0.06255,POINT (127.04771 37.28956)
2,경기남부청,수원남부경찰서,권선파출소,127.02629,37.26075,권선구 권선동1012,다사57bb18aa,0.06255,POINT (127.02629 37.26075)
3,경기남부청,수원남부경찰서,매탄지구대,127.04,37.26516,팔달구 인계동 1117,다사59aa18ba,0.06255,POINT (127.04000 37.26516)
4,경기남부청,수원남부경찰서,산남지구대,127.05303,37.26866,영통구 매탄동 1213-2,다사60ab18bb,0.06255,POINT (127.05303 37.26866)


### 격자 기준 파출소 영역 밖 면적 구하기
2020년 보도된 한 뉴스 기사에 따르면(<a href="https://www.pressian.com/pages/articles/2020100811011343168#0DKU">기사참조링크</a>) 지방청별 평균 출동시간은 
3~ 6분 내외인것으로 드러남. 따라서 6분 이내에 출동 할 수 있는 약 1km 반경을 파출소 영역으로 취급하고, 파출소 영역을 벗어난 area의
면적을 연산하여 구해본다.

In [77]:
buf_poly = gpd.GeoDataFrame({'geometry': grid_police_con.buffer(0.015)}, crs='epsg:4326')
origin_ = suwon_grid.groupby(['gid']).apply(lambda x : x.area.sum())
print(origin_)
dif_area = gpd.overlay(suwon_grid, buf_poly, how='difference')
dif_area = dif_area.dissolve(by='gid')
print(dif_area)
dif_area.reset_index(inplace=True)


# ax = dif_area.plot(column='NAME', figsize=(8,8), alpha=0.8)
# ax.set_title("total - Police Buffer", fontsize=20)
# ax.set_axis_off()
# plt.show()

dif_area = dif_area.dissolve(by='gid')
print("전체 대비 미커버지역 비율")
print(round(dif_area.area / origin_ * 100))

gid
다사49aa19ab   0.00001
다사49aa19ba   0.00001
다사49aa20aa   0.00001
다사49ab18ab   0.00001
다사49ab18ba   0.00001
               ...  
다사63ba21aa   0.00001
다사63ba21ab   0.00001
다사63ba21ba   0.00001
다사63ba21bb   0.00001
다사63ba22aa   0.00001
Length: 2090, dtype: float64
                                                     geometry  total_area
gid                                                                      
다사49aa19ab  POLYGON ((126.92477 37.27077, 126.92475 37.273...     0.06255
다사49aa19ba  POLYGON ((126.92475 37.27302, 126.92473 37.275...     0.06255
다사49aa20aa  POLYGON ((126.92472 37.27753, 126.92470 37.279...     0.06255
다사49ab18ab  POLYGON ((126.92766 37.26177, 126.92764 37.264...     0.06255
다사49ab18ba  POLYGON ((126.92764 37.26402, 126.92762 37.266...     0.06255
...                                                       ...         ...
다사63ba21aa  POLYGON ((127.08822 37.28722, 127.08821 37.289...     0.06255
다사63ba21ab  POLYGON ((127.08821 37.28947, 127.08820 37.291...     0.06

In [78]:
grid_police_area_ = suwon_grid.copy()
grid_police_area_ = pd.merge(grid_police_area_, pd.DataFrame(round(dif_area.area / origin_ * 100)).reset_index())
grid_police_area_ = grid_police_area_.rename(columns = {0 : 'remain_area_ratio'})
grid_police_area_['remain_area_ratio'].sort_values(ascending=False)

2089   100.00000
892    100.00000
983    100.00000
982    100.00000
981    100.00000
          ...   
2062         nan
2063         nan
2064         nan
2065         nan
2074         nan
Name: remain_area_ratio, Length: 2090, dtype: float64

In [79]:
grid_police_area_.fillna(0, inplace=True)
grid_police_area_['remain_area_ratio'].describe()

count   2090.00000
mean      29.23493
std       44.05911
min        0.00000
25%        0.00000
50%        0.00000
75%      100.00000
max      100.00000
Name: remain_area_ratio, dtype: float64

In [80]:
grid_police_con.head(1)

Unnamed: 0,pname,psname,name,lon,lat,addr,gid,total_area,geometry
0,경기남부청,수원남부경찰서,곡선지구대,127.03004,37.2518,권선구 권선동 1268-5,다사58ab17aa,0.06255,POINT (127.03004 37.25180)


In [81]:
psname_cnt = grid_police_con.drop_duplicates(subset='psname').groupby('gid').count().reset_index()[['gid','name']].rename(columns={'name' : 'police_cnt'})
name_cnt = grid_police_con.groupby('gid').count().reset_index()[['gid','name']].rename(columns={'name' : 'security_cnt'})
police_con_df = pd.merge(psname_cnt, name_cnt, on='gid')

In [82]:
# 관할서, 치안안전센터, 영역밖의 면적 비율
police_con_df =  pd.merge(police_con_df, pd.DataFrame(round(dif_area.area / origin_ * 100)).reset_index().rename(columns={ 0 : 'remain_area_ratio'}) , how='outer')
police_con_df.fillna(0, inplace=True)
police_con_df.head(3)

Unnamed: 0,gid,police_cnt,security_cnt,remain_area_ratio
0,다사55bb19ab,1.0,1.0,0.0
1,다사58ab17aa,1.0,1.0,0.0
2,다사58ab20ab,1.0,1.0,0.0


## 7. 소득수준, 지가

In [83]:
tax_grid_ = pd.read_csv('./data/지방세부담액_공시지가.csv')
del tax_grid_['Unnamed: 0']
tax_grid_ = tax_grid_[tax_grid_['SGG_NM'].isin(['수원시권선구', '수원시영통구', '수원시장안구', '수원시팔달구'])]
tax_grid_

Unnamed: 0,SGG_NM,주민1인당 부담금액,PNILP
28,수원시권선구,862490,647850.17065
29,수원시영통구,2545850,1296400.0
30,수원시장안구,627870,695564.78873
31,수원시팔달구,2054130,1200172.18935


In [84]:
suwon_ = pd.merge(tax_grid_, suwon_area[['SGG_NM','geometry']])
suwon_ = gpd.GeoDataFrame(suwon_, crs='epsg:4326')
tax_pnilp_grid = gpd.overlay(suwon_, suwon_grid, how='intersection')
tax_pnilp_grid = tax_pnilp_grid[['gid','주민1인당 부담금액', 'PNILP']]
tax_pnilp_grid.head()

Unnamed: 0,gid,주민1인당 부담금액,PNILP
0,다사49aa19ab,862490,647850.17065
1,다사49aa19ba,862490,647850.17065
2,다사49aa20aa,862490,647850.17065
3,다사49ab18ab,862490,647850.17065
4,다사49ab18ba,862490,647850.17065


## 8. 종합

In [85]:
grid_temp = pd.merge(report_pop, area_con_grid, on='gid')
grid_temp = grid_temp.to_crs({'init' : 'epsg:4326'})
grid_temp.columns

Index(['gid', 'm_20g_pop', 'w_20g_pop', 'm_30g_pop', 'w_30g_pop', 'm_40g_pop',
       'w_40g_pop', 'm_50g_pop', 'w_50g_pop', 'm_60g_pop', 'w_60g_pop',
       'm_70g_pop', 'w_70g_pop', 'm_80g_pop', 'w_80g_pop', 'm_90g_pop',
       'w_90g_pop', 'm_100_pop', 'w_100g_pop', 'geometry', 'total_pop',
       'old_pop', 'total_cnt', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율',
       'total_area', '녹지지역_use_ratio', '주거지역_use_ratio', '상업지역_use_ratio',
       '공업지역_use_ratio'],
      dtype='object')

In [86]:
grid_merge_ = gpd.overlay(grid_temp, suwon_area, how='intersection')

- 격자별 각종 인구 특성 비율 맞추기

In [87]:
# 시군구별 격자 1당 인구수 비율
sgg_total = grid_merge_.groupby('sgg_nm').sum()[['total_cnt', 'total_pop']].reset_index()
sgg_total.columns = ['sgg_nm' , 'sgg_total_cnt', 'sgg_total_pop']
sgg_grid_merge = pd.merge(grid_merge_[['sgg_nm','gid', 'total_pop','total_cnt']], sgg_total)

### 구 단위로만 존재하는 데이터들은, 총인구의 비율에 따라서 다시 계산한다.

In [88]:
# grid_sgg_ratio : 총인구 비율(구 전체 대비 격자1의 총인구 비중)
sgg_grid_merge['grid_sgg_ratio'] =  sgg_grid_merge['total_pop'] / sgg_grid_merge['sgg_total_pop']
sgg_grid_merge.head()

Unnamed: 0,sgg_nm,gid,total_pop,total_cnt,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio
0,권선구,다사49aa19ab,0.0,0.0,76415.0,326743.0,0.0
1,권선구,다사49aa19ba,0.0,0.0,76415.0,326743.0,0.0
2,권선구,다사49aa20aa,0.0,0.0,76415.0,326743.0,0.0
3,권선구,다사49ab18ab,0.0,0.0,76415.0,326743.0,0.0
4,권선구,다사49ab18ba,0.0,0.0,76415.0,326743.0,0.0


In [89]:
sgg_grid_merge['grid_sgg_ratio'].sort_values(ascending=False)

1196   0.01979
1692   0.01535
1693   0.01516
1171   0.01419
1454   0.01253
         ...  
1202   0.00000
1203   0.00000
1204   0.00000
1205   0.00000
0      0.00000
Name: grid_sgg_ratio, Length: 2247, dtype: float64

In [90]:
pop_total = pd.read_csv('./data/4개시군구_인구및주택현황.csv')
del pop_total['Unnamed: 0']
pop_total.columns

Index(['sgg_nm', '총인구 (명)', '남자 (명)', '여자 (명)', '내국인-계 (명)', '내국인-남자 (명)',
       '내국인-여자 (명)', '외국인-계 (명)', '외국인-남자 (명)', '외국인-여자 (명)', '가구-계 (가구)',
       '일반가구 (가구)', '집단가구 (가구)', '외국인가구 (가구)', '주택-계 (호)', '단독주택 (호)',
       '아파트 (호)', '연립주택 (호)', '다세대주택 (호)', '비거주용 건물내 주택 (호)', '주택이외의 거처 (호)',
       '세대당 인구수', '성비', '외국인 비율', 'sido', 'poor_num', 'single_20대',
       'single_65세 이상', 'single_합계'],
      dtype='object')

In [91]:
# 종합특성도출
pop_merge_grid = pd.merge(pop_total, sgg_grid_merge)
pop_merge_grid['내국인-계 (명)'] = pop_merge_grid['내국인-계 (명)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['외국인-계 (명)'] = pop_merge_grid['외국인-계 (명)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['내국인-여자 (명)'] = pop_merge_grid['내국인-여자 (명)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['내국인-남자 (명)'] = pop_merge_grid['내국인-남자 (명)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['외국인-남자 (명)'] = pop_merge_grid['외국인-남자 (명)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['외국인-여자 (명)'] = pop_merge_grid['외국인-여자 (명)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['가구-계 (가구)'] = pop_merge_grid['가구-계 (가구)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['일반가구 (가구)'] = pop_merge_grid['일반가구 (가구)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['집단가구 (가구)'] = pop_merge_grid['집단가구 (가구)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['외국인가구 (가구)'] = pop_merge_grid['외국인가구 (가구)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['주택-계 (호)'] = pop_merge_grid['주택-계 (호)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['아파트 (호)'] = pop_merge_grid['아파트 (호)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['연립주택 (호)'] = pop_merge_grid['연립주택 (호)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['다세대주택 (호)'] = pop_merge_grid['다세대주택 (호)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['비거주용 건물내 주택 (호)'] = pop_merge_grid['비거주용 건물내 주택 (호)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['주택이외의 거처 (호)'] = pop_merge_grid['주택이외의 거처 (호)'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['poor_num'] = pop_merge_grid['poor_num'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['single_65세 이상'] = pop_merge_grid['single_65세 이상'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['single_20대'] = pop_merge_grid['single_20대'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid['single_합계'] = pop_merge_grid['single_합계'] * pop_merge_grid['grid_sgg_ratio']
pop_merge_grid.head(3)

Unnamed: 0,sgg_nm,총인구 (명),남자 (명),여자 (명),내국인-계 (명),내국인-남자 (명),내국인-여자 (명),외국인-계 (명),외국인-남자 (명),외국인-여자 (명),...,poor_num,single_20대,single_65세 이상,single_합계,gid,total_pop,total_cnt,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio
0,장안구,278507,140771,137736,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,다사52ab22ab,0.0,12.0,50671.0,248732.0,0.0
1,장안구,278507,140771,137736,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,다사52ab22ba,0.0,21.0,50671.0,248732.0,0.0
2,장안구,278507,140771,137736,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,다사52ba21bb,0.0,12.0,50671.0,248732.0,0.0


In [92]:
pop_merge_grid.drop_duplicates(subset='gid', inplace=True)
pop_merge_grid = pop_merge_grid[['gid','sgg_nm', '내국인-계 (명)', '내국인-남자 (명)',
       '내국인-여자 (명)', '외국인-계 (명)', '외국인-남자 (명)', '외국인-여자 (명)', '가구-계 (가구)',
       '일반가구 (가구)', '집단가구 (가구)', '외국인가구 (가구)', '주택-계 (호)', '단독주택 (호)',
       '아파트 (호)', '연립주택 (호)', '다세대주택 (호)', '비거주용 건물내 주택 (호)', '주택이외의 거처 (호)',
       'sido', 'poor_num', 'single_20대',
       'single_65세 이상', 'single_합계',  'total_pop', 'total_cnt',
       'sgg_total_cnt', 'sgg_total_pop', 'grid_sgg_ratio']]
pop_merge_grid.head(3) # 인구 최종

Unnamed: 0,gid,sgg_nm,내국인-계 (명),내국인-남자 (명),내국인-여자 (명),외국인-계 (명),외국인-남자 (명),외국인-여자 (명),가구-계 (가구),일반가구 (가구),...,sido,poor_num,single_20대,single_65세 이상,single_합계,total_pop,total_cnt,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio
0,다사52ab22ab,장안구,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,경기도,0.0,0.0,0.0,0.0,0.0,12.0,50671.0,248732.0,0.0
1,다사52ab22ba,장안구,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,경기도,0.0,0.0,0.0,0.0,0.0,21.0,50671.0,248732.0,0.0
2,다사52ba21bb,장안구,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,경기도,0.0,0.0,0.0,0.0,0.0,12.0,50671.0,248732.0,0.0


In [93]:
grid_pop_report_total = pd.merge(grid_temp[['gid', 'total_cnt', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율',
       'total_area', '녹지지역_use_ratio', '주거지역_use_ratio', '상업지역_use_ratio',
       '공업지역_use_ratio'
                   ]], pop_merge_grid)
grid_pop_report_total.head(3)

Unnamed: 0,gid,total_cnt,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt,교통신고비율,기타및서비스비율,절도폭력및경범비율,...,주택이외의 거처 (호),sido,poor_num,single_20대,single_65세 이상,single_합계,total_pop,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio
0,다사49aa19ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,경기도,0.0,0.0,0.0,0.0,0.0,76415.0,326743.0,0.0
1,다사49aa19ba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,경기도,0.0,0.0,0.0,0.0,0.0,76415.0,326743.0,0.0
2,다사49aa20aa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,경기도,0.0,0.0,0.0,0.0,0.0,76415.0,326743.0,0.0


In [94]:
report_pop.columns

Index(['gid', 'm_20g_pop', 'w_20g_pop', 'm_30g_pop', 'w_30g_pop', 'm_40g_pop',
       'w_40g_pop', 'm_50g_pop', 'w_50g_pop', 'm_60g_pop', 'w_60g_pop',
       'm_70g_pop', 'w_70g_pop', 'm_80g_pop', 'w_80g_pop', 'm_90g_pop',
       'w_90g_pop', 'm_100_pop', 'w_100g_pop', 'geometry', 'total_pop',
       'old_pop', 'total_cnt', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율'],
      dtype='object')

#### 총인구, 총 여성/남성 수는 격자데이터 상의 수치를 참조한다

In [95]:
# 총인구, 총 여성, 총 남성
report_pop['w_total'] = report_pop['w_20g_pop'] +  report_pop['w_30g_pop'] +  report_pop['w_40g_pop'] +  report_pop['w_50g_pop'] +\
           report_pop['w_60g_pop'] +  report_pop['w_70g_pop'] +  report_pop['w_80g_pop'] +  report_pop['w_90g_pop'] +  report_pop['w_100g_pop']

report_pop['m_total'] = report_pop['m_20g_pop'] +  report_pop['m_30g_pop'] +  report_pop['m_40g_pop'] +  report_pop['m_50g_pop'] +\
           report_pop['m_60g_pop'] +  report_pop['m_70g_pop'] +  report_pop['m_80g_pop'] +  report_pop['m_90g_pop'] +  report_pop['m_100_pop']
report_pop = report_pop.rename(columns = {'total_pop' : '총인구 (명)', 'm_total' : '남자 (명)', 'w_total' : '여자 (명)'})
pop_total_gid = report_pop[['gid','총인구 (명)', '남자 (명)',  '여자 (명)']]

In [96]:
grid_pop_report_total.columns

Index(['gid', 'total_cnt', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율',
       'total_area', '녹지지역_use_ratio', '주거지역_use_ratio', '상업지역_use_ratio',
       '공업지역_use_ratio', 'sgg_nm', '내국인-계 (명)', '내국인-남자 (명)', '내국인-여자 (명)',
       '외국인-계 (명)', '외국인-남자 (명)', '외국인-여자 (명)', '가구-계 (가구)', '일반가구 (가구)',
       '집단가구 (가구)', '외국인가구 (가구)', '주택-계 (호)', '단독주택 (호)', '아파트 (호)',
       '연립주택 (호)', '다세대주택 (호)', '비거주용 건물내 주택 (호)', '주택이외의 거처 (호)', 'sido',
       'poor_num', 'single_20대', 'single_65세 이상', 'single_합계', 'total_pop',
       'sgg_total_cnt', 'sgg_total_pop', 'grid_sgg_ratio'],
      dtype='object')

In [97]:
# grid_pop_report_total.drop(['총인구 (명)', '남자 (명)', '여자 (명)'], axis=1, inplace=True)

In [98]:
grid_ = pd.merge(grid_pop_report_total, pop_total_gid)
grid_.head(3)

Unnamed: 0,gid,total_cnt,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt,교통신고비율,기타및서비스비율,절도폭력및경범비율,...,single_20대,single_65세 이상,single_합계,total_pop,sgg_total_cnt,sgg_total_pop,grid_sgg_ratio,총인구 (명),남자 (명),여자 (명)
0,다사49aa19ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,76415.0,326743.0,0.0,0.0,0.0,0.0
1,다사49aa19ba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,76415.0,326743.0,0.0,0.0,0.0,0.0
2,다사49aa20aa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,76415.0,326743.0,0.0,0.0,0.0,0.0


In [99]:
grid_['gid'].nunique()

2086

In [100]:
grid_.columns

Index(['gid', 'total_cnt', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율',
       'total_area', '녹지지역_use_ratio', '주거지역_use_ratio', '상업지역_use_ratio',
       '공업지역_use_ratio', 'sgg_nm', '내국인-계 (명)', '내국인-남자 (명)', '내국인-여자 (명)',
       '외국인-계 (명)', '외국인-남자 (명)', '외국인-여자 (명)', '가구-계 (가구)', '일반가구 (가구)',
       '집단가구 (가구)', '외국인가구 (가구)', '주택-계 (호)', '단독주택 (호)', '아파트 (호)',
       '연립주택 (호)', '다세대주택 (호)', '비거주용 건물내 주택 (호)', '주택이외의 거처 (호)', 'sido',
       'poor_num', 'single_20대', 'single_65세 이상', 'single_합계', 'total_pop',
       'sgg_total_cnt', 'sgg_total_pop', 'grid_sgg_ratio', '총인구 (명)', '남자 (명)',
       '여자 (명)'],
      dtype='object')

In [101]:
# feature 추가해주기(외국인비율,성비)
grid_['외국인 비율']=  grid_['외국인-계 (명)'] / grid_['총인구 (명)'] * 100
# 성비 : 남 / 여 * 100
grid_['성비']=  grid_['남자 (명)'] / grid_['여자 (명)'] * 100

In [102]:
grid_ = grid_.rename(columns = {'total_cnt' : 'case_type'})

temp_ = pd.merge(plan_use_grid, suwon_grid, how='outer')
temp_2 = pd.merge(temp_, house_total, how='outer')
grid_2 = pd.merge(temp_2, facility_grid_)
grid_2.fillna(0, inplace=True)
grid_2.drop_duplicates('gid', inplace=True)
grid_2.head()

Unnamed: 0,gid,plan_total_area,도로용지_plan_use_ratio,학교용지_plan_use_ratio,공원용지_plan_use_ratio,녹지_plan_use_ratio,주거용지_plan_use_ratio,공공용지_plan_use_ratio,업무용지_plan_use_ratio,시설용지_plan_use_ratio,...,cctv_cnt,play_cnt,park_cnt,conv_cnt,pub_cnt,cctv밀도,보안등밀도,유흥업소밀도,편의점밀도,공원및놀이시설밀도
0,다사49bb19ba,0.00122,79.89091,20.10909,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,2.0,0.0,0.0,0.0,79.94096,0.0,0.0,0.0,31.97639
1,다사49bb19bb,0.02485,20.41719,58.52691,13.96984,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,79.94096,0.0,0.0,0.0,0.0
2,다사50aa18bb,0.00202,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,다사50aa19aa,0.00752,67.41629,0.0,18.79339,8.90836,4.88196,0.0,0.0,0.0,...,5.0,3.0,0.0,2.0,0.0,79.94091,0.0,0.0,31.97637,47.96455
4,다사50aa19ab,0.02149,29.78879,0.0,0.0,11.76005,58.45116,0.0,0.0,0.0,...,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.97637


In [103]:
temp_ = pd.merge(fire_sui_grid_, police_con_df, how='outer')
grid_3 = pd.merge(temp_, tax_pnilp_grid, how='outer')
grid_3.fillna(0, inplace=True)
grid_3.drop_duplicates('gid', inplace=True)
grid_3.head(3)

Unnamed: 0,gid,acc_num,tot_vic_num,total_dmg,인구10만명당자살율,police_cnt,security_cnt,remain_area_ratio,주민1인당 부담금액,PNILP
0,다사49aa19ab,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065
1,다사49aa19ba,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065
2,다사49aa20aa,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065


In [104]:
grid_temp_ = pd.merge(grid_, grid_2, how='outer')
grid_temp_.drop(['total_pop', 'sgg_total_cnt', 'sgg_total_pop', 'geometry'], axis=1, inplace=True)
grid_total = pd.merge(grid_temp_, grid_3, how='outer')
grid_total.head(3)

Unnamed: 0,gid,case_type,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt,교통신고비율,기타및서비스비율,절도폭력및경범비율,...,공원및놀이시설밀도,acc_num,tot_vic_num,total_dmg,인구10만명당자살율,police_cnt,security_cnt,remain_area_ratio,주민1인당 부담금액,PNILP
0,다사49aa19ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065
1,다사49aa19ba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065
2,다사49aa20aa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065


In [105]:
grid_total.drop_duplicates(subset='gid', inplace=True)
grid_total.head(3)

Unnamed: 0,gid,case_type,기타_case_cnt,교통_case_cnt,서비스_case_cnt,시비/폭력_case_cnt,절도/경범_case_cnt,교통신고비율,기타및서비스비율,절도폭력및경범비율,...,공원및놀이시설밀도,acc_num,tot_vic_num,total_dmg,인구10만명당자살율,police_cnt,security_cnt,remain_area_ratio,주민1인당 부담금액,PNILP
0,다사49aa19ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065
1,다사49aa19ba,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065
2,다사49aa20aa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,25.3,0.0,0.0,100.0,862490.0,647850.17065


In [106]:
grid_total.columns

Index(['gid', 'case_type', '기타_case_cnt', '교통_case_cnt', '서비스_case_cnt',
       '시비/폭력_case_cnt', '절도/경범_case_cnt', '교통신고비율', '기타및서비스비율', '절도폭력및경범비율',
       'total_area', '녹지지역_use_ratio', '주거지역_use_ratio', '상업지역_use_ratio',
       '공업지역_use_ratio', 'sgg_nm', '내국인-계 (명)', '내국인-남자 (명)', '내국인-여자 (명)',
       '외국인-계 (명)', '외국인-남자 (명)', '외국인-여자 (명)', '가구-계 (가구)', '일반가구 (가구)',
       '집단가구 (가구)', '외국인가구 (가구)', '주택-계 (호)', '단독주택 (호)', '아파트 (호)',
       '연립주택 (호)', '다세대주택 (호)', '비거주용 건물내 주택 (호)', '주택이외의 거처 (호)', 'sido',
       'poor_num', 'single_20대', 'single_65세 이상', 'single_합계',
       'grid_sgg_ratio', '총인구 (명)', '남자 (명)', '여자 (명)', '외국인 비율', '성비',
       'plan_total_area', '도로용지_plan_use_ratio', '학교용지_plan_use_ratio',
       '공원용지_plan_use_ratio', '녹지_plan_use_ratio', '주거용지_plan_use_ratio',
       '공공용지_plan_use_ratio', '업무용지_plan_use_ratio', '시설용지_plan_use_ratio',
       'OLD_YEAR', 'area_old_level_3', 'area_old_level_4', 'area_old_level_1',
       'area_old_level_2', 'securitylight_cn

#### 추가한 feature들을 넣어줍니다

In [107]:
# 주택(호) 비율
grid_total['house_ratio'] = round(grid_total['주택-계 (호)'] / grid_total['가구-계 (가구)'] * 100, 2)
# 1인가구 비율
grid_total['single_ratio'] = round(grid_total['single_합계'] / grid_total['총인구 (명)'] * 100, 2)
# 기초수급자 비율
grid_total['single_ratio'] = round(grid_total['poor_num'] / grid_total['총인구 (명)'] * 100, 2)

# 인구대비 신고 수 전체 통계
grid_total['report_ratio'] = round(grid_total['case_type'] / grid_total['총인구 (명)'], 2) * 100
# 인구 대비 교통 관련 신고 수
grid_total['report_traffic_ratio'] = round(grid_total['교통_case_cnt'] / grid_total['총인구 (명)'], 2) * 100
# 인구 대비 시비 폭력 관련 신고 수
grid_total['report_vio_ratio'] = round(grid_total['시비/폭력_case_cnt'] / grid_total['총인구 (명)'], 2) * 100
# 인구 대비 절도 경범 관련 신고 수
grid_total['report_crime_ratio'] = round(grid_total['절도/경범_case_cnt'] / grid_total['총인구 (명)'], 2) * 100

# 파출소 1개별 접수건수
grid_total['case_for_sec'] = grid_total['case_type']  / grid_total['security_cnt']

# 면적당 신고수
grid_total['report_for_area']= grid_total['case_type'] / grid_total['total_area']

In [108]:
grid_total['세대당 인구수'] = grid_total['총인구 (명)'] / grid_total['가구-계 (가구)']
grid_total['기타_use_ratio']= 0
grid_total['농경지역_use_ratio']=0
grid_total.fillna(0, inplace=True)

In [109]:
grid_total['녹지지역_use_ratio'].sort_values(ascending=False) # 녹지가 100% 면 제외
grid_total['녹지지역_use_ratio']

0      0.12064
1      0.01745
2      0.01209
3      0.04735
4      0.27205
         ...  
2085   0.24245
2086   0.00000
2087   0.00000
2088   0.00000
2089   0.00000
Name: 녹지지역_use_ratio, Length: 2090, dtype: float64

#### 녹지가5할이상 차지하는 격자의 경우 분석대상에서 제외하는것을 고려(인구,112신고 등의 수치가 부족하여 유의미한 결과 도출이 어려움)

In [110]:
grid_total[(grid_total['녹지지역_use_ratio'] < 1) & (grid_total['녹지지역_use_ratio'] >= 0.9)][['녹지지역_use_ratio', '주거지역_use_ratio']]

Unnamed: 0,녹지지역_use_ratio,주거지역_use_ratio
7,0.92850,0.00000
10,0.96837,0.00000
16,0.99678,0.00000
21,0.91837,0.08163
24,1.00000,0.00000
...,...,...
2020,0.92697,0.07303
2050,0.98058,0.00000
2067,0.91244,0.08756
2075,0.94978,0.00000


In [111]:
grid_total.to_csv('./data/수원격자종합.csv')

In [112]:
# kk_area = gpd.GeoDataFrame.from_file('data/LARD_ADM_SECT_SGG_41.shp', encoding='cp949')
# kk_area = kk_area.to_crs({'init':'epsg:4326'}) # 좌표계 epsg : 4326
# suwon_area = kk_area[kk_area['SGG_NM'].str.contains('수원')]
# suwon_area