# 라이브러리 설치

In [None]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.12.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyproj>=2.6.1.post1
  Downloading pyproj-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
Collecting fiona>=1.8
  Downloading Fiona-1.8.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-

In [None]:
!pip install folium matplotlib mapclassify

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mapclassify
  Downloading mapclassify-2.4.3-py3-none-any.whl (38 kB)
Installing collected packages: mapclassify
Successfully installed mapclassify-2.4.3


# 라이브러리 및 경로 설정

In [None]:
import geopandas as gpd
import pandas as pd

from shapely import wkt

In [None]:
src_path = '/content/drive/MyDrive/sk쉴더스/프로젝트1/데이터/'

# 데이터 로드 및 정제

## 격자 데이터

In [None]:
grids = pd.read_csv(src_path + 'unique_grid.csv')
grids.head(1)

FileNotFoundError: ignored

In [None]:
grids['geometry'] = grids['geometry'].apply(wkt.loads)
geo_grid = gpd.GeoDataFrame(grids, crs='epsg:5179')

## 학교 데이터

In [None]:
schools = pd.read_csv(src_path + '주변시설물/전국초중등학교위치표준데이터.csv', encoding='cp949')
display(schools.head(1), schools.shape)

In [None]:
# 서울시만
seoul_school = schools[schools['소재지도로명주소'].str.contains('서울')]
display(seoul_school.tail(1), seoul_school.shape)

In [None]:
# 특정 컬럼만
seoul_school = seoul_school[['학교ID', '학교급구분', '위도', '경도']]

In [None]:
geo_seoul_school = gpd.GeoDataFrame(
    seoul_school, geometry=gpd.points_from_xy(seoul_school['경도'], seoul_school['위도'])
)
geo_seoul_school.set_crs(epsg = 4326, inplace = True)
geo_seoul_school.to_crs(epsg=5179, inplace=True)
geo_seoul_school.head(2)

In [None]:
geo_seoul_school.explore()

## 어린이보호구역 데이터

In [None]:
children_safety_zone = pd.read_csv(src_path + '주변시설물/전국어린이보호구역표준데이터.csv', encoding='cp949')
display(children_safety_zone.head(1), children_safety_zone.shape)

In [None]:
# 어린이보호구역 데이터는 대상 시설이 정해져 있음을 알 수 있다
# 어린이집 유치원 초등학교 중에서 어린이보호구역이 있는 곳과 없는 곳의 사고 차이를 보면 뭔가 알 수 있지 않을까
children_safety_zone['시설종류'].unique()

In [None]:
# 학교 데이터랑 같이 도로명 주소로 서울시만 뽑으려고 했으나 null인 값 존재
# 다행히도 도로명이랑 지번주소 둘다 null인 값은 없으니까 하나가 null이면 다른 주소를 이용하면 될 것 같음
children_safety_zone[children_safety_zone['소재지지번주소'].isna()].isna().sum()

In [None]:
children_safety_zone[children_safety_zone['소재지도로명주소'].isna()].isna().sum()

In [None]:
# 도로명 주소가 null이면 그냥 지번주소 임시로 넣어놓기
children_safety_zone.loc[children_safety_zone['소재지도로명주소'].isna(), '소재지도로명주소'] = \
  children_safety_zone.loc[children_safety_zone['소재지도로명주소'].isna(), '소재지지번주소']

In [None]:
children_safety_zone[children_safety_zone['소재지도로명주소'].isna()]

In [None]:
seoul_child_safe_zone = children_safety_zone[children_safety_zone['소재지도로명주소'].str.contains('서울')]
display(seoul_child_safe_zone.tail(1), seoul_child_safe_zone.shape)

In [None]:
seoul_child_safe_zone = seoul_child_safe_zone[['시설종류', '대상시설명', '위도', '경도']]

In [None]:
seoul_child_safe_zone = seoul_child_safe_zone[~seoul_child_safe_zone['경도'].isna()]

In [None]:
geo_seoul_child_safe_zone = gpd.GeoDataFrame(
    seoul_child_safe_zone, geometry=gpd.points_from_xy(seoul_child_safe_zone['경도'], seoul_child_safe_zone['위도'])
)
geo_seoul_child_safe_zone.set_crs(epsg = 4326, inplace = True)
geo_seoul_child_safe_zone.to_crs(epsg=5179, inplace=True)
geo_seoul_child_safe_zone.head(2)

In [None]:
geo_seoul_child_safe_zone.explore()

## 데이터 정제 결과

- geo_grid : 격자 데이터
- geo_seoul_school : 서울시 초, 중, 고등학교 데이터
- geo_seoul_child_safe_zone : 서울시 어린이보호구역 데이터

- 좌표계는 epsg:5179로 통일

# 격자별 특성 개수

In [None]:
def point_to_grid(grid_df, point_df, col_name):
  '''
  ## Args
  `grid_df` 격자 geopandas 데이터프레임 5179
  `point_df` 격자를 매칭할 geopandas 데이터프레임 5179

  ## Returns
  `count_by_grid` 격자별 개수
  '''
  match_df = gpd.sjoin(point_df, grid_df, how='left')
  match_df = match_df[~match_df['gid'].isnull()]
  match_by_grid = match_df.pivot_table(index='gid', values=match_df.columns[0], aggfunc='count')
# --------
  count_by_grid = pd.DataFrame()
  count_by_grid['gid'] = grid_df['gid']
  count_by_grid[col_name] = 0

  for row in match_by_grid.iterrows():
    gid = row[0]
    count = row[1][match_df.columns[0]]
    count_by_grid.loc[count_by_grid['gid']==gid, col_name] = count

  return count_by_grid

In [None]:
sc = point_to_grid(geo_grid, geo_seoul_school, '초중고등학교수')

In [None]:
c_s = point_to_grid(geo_grid, geo_seoul_child_safe_zone, '어린이보호구역수')

In [None]:
df = pd.merge(sc, c_s, how='inner', on='gid')

# 학교와 어린이보호구역은 교통사고와 관련이 있을까

In [None]:
accidents = pd.read_csv(src_path + '교통사고 데이터/교통사고데이터.csv', encoding='cp949')
accidents = accidents[['acdnt_no', 'x_crdnt', 'y_crdnt']]

In [None]:
geo_accidents = gpd.GeoDataFrame(
    accidents, geometry=gpd.points_from_xy(accidents['x_crdnt'], accidents['y_crdnt'])
)
geo_accidents.set_crs(epsg = 5179, inplace = True)
geo_accidents.head(2)

In [None]:
acc = point_to_grid(geo_grid, geo_accidents, '교통사고수')

In [None]:
acc.to_csv(src_path + 'accident.csv', index=False)

In [None]:
df = pd.merge(df, acc, how='inner', on='gid')
df.head(3)

In [None]:
# 유의미하지 않음. 초중고등학교를 일단 초, 중, 고등학교로 나누어보자
df.corr()

In [None]:
elemetary_school = geo_seoul_school[geo_seoul_school['학교급구분']=='초등학교']
middle_school = geo_seoul_school[geo_seoul_school['학교급구분']=='중학교']
high_school = geo_seoul_school[geo_seoul_school['학교급구분']=='고등학교']

In [None]:
ele = point_to_grid(geo_grid, elemetary_school, '초등학교수')
mid = point_to_grid(geo_grid, middle_school, '중학교수')
high = point_to_grid(geo_grid, high_school, '고등학교수')

In [None]:
df = pd.merge(ele, mid, how='inner', on='gid')
df = pd.merge(df, high, how='inner', on='gid')
df = pd.merge(df, c_s, how='inner', on='gid')

In [None]:
df.to_csv(src_path + 'other_facil.csv', index=False)

In [None]:

df = pd.merge(df, acc, how='inner', on='gid')

In [None]:
df.corr()

In [None]:
df = df.set_index('gid')

In [None]:
df.describe()