## 서울시 교통사고 시각화

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium 

In [2]:
traffic_seoul = pd.read_csv("./Data/newSeoul_2005_2019.csv")
traffic_seoul.head()

Unnamed: 0,년도,월,자치구명,발생건수,사망자수,부상자수
0,2005,1,종로구,93,2,138
1,2005,2,종로구,84,3,125
2,2005,3,종로구,117,0,142
3,2005,4,종로구,138,2,212
4,2005,5,종로구,145,2,207


In [3]:
traffic_seoul.tail()

Unnamed: 0,년도,월,자치구명,발생건수,사망자수,부상자수
4495,2019,8,강동구,127,2,175
4496,2019,9,강동구,98,2,137
4497,2019,10,강동구,108,1,144
4498,2019,11,강동구,146,0,199
4499,2019,12,강동구,108,1,131


In [4]:
traffic_seoul.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4500 entries, 0 to 4499
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   년도      4500 non-null   int64 
 1   월       4500 non-null   int64 
 2   자치구명    4500 non-null   object
 3   발생건수    4500 non-null   int64 
 4   사망자수    4500 non-null   int64 
 5   부상자수    4500 non-null   int64 
dtypes: int64(5), object(1)
memory usage: 211.1+ KB


### 컬럼별 상관계수 구하기

In [6]:
np.corrcoef(traffic_seoul['발생건수'], traffic_seoul['부상자수'])

array([[1.       , 0.9780286],
       [0.9780286, 1.       ]])

In [7]:
np.corrcoef(traffic_seoul['발생건수'], traffic_seoul['사망자수'])

array([[1.        , 0.24622428],
       [0.24622428, 1.        ]])

In [8]:
np.corrcoef(traffic_seoul['부상자수'], traffic_seoul['사망자수'])

array([[1.        , 0.24440625],
       [0.24440625, 1.        ]])

In [11]:
# 2019년 데이터만 추출
traffic_seoul_2019 = \
  traffic_seoul[traffic_seoul['년도'] == 2019]

traffic_seoul_2019

Unnamed: 0,년도,월,자치구명,발생건수,사망자수,부상자수
4200,2019,1,종로구,87,1,125
4201,2019,2,종로구,66,1,84
4202,2019,3,종로구,87,2,122
4203,2019,4,종로구,85,0,131
4204,2019,5,종로구,112,1,158
...,...,...,...,...,...,...
4495,2019,8,강동구,127,2,175
4496,2019,9,강동구,98,2,137
4497,2019,10,강동구,108,1,144
4498,2019,11,강동구,146,0,199


In [12]:
# index 정리하기
traffic_seoul_2019.reset_index(drop=True, inplace=True)
traffic_seoul_2019

Unnamed: 0,년도,월,자치구명,발생건수,사망자수,부상자수
0,2019,1,종로구,87,1,125
1,2019,2,종로구,66,1,84
2,2019,3,종로구,87,2,122
3,2019,4,종로구,85,0,131
4,2019,5,종로구,112,1,158
...,...,...,...,...,...,...
295,2019,8,강동구,127,2,175
296,2019,9,강동구,98,2,137
297,2019,10,강동구,108,1,144
298,2019,11,강동구,146,0,199


In [14]:
# 자치구별 발생건수, 사망자수, 부상자수 집계하기
trffic_anal = \
  pd.pivot_table(
    traffic_seoul_2019,
    index='자치구명',
    aggfunc=sum
  )

trffic_anal

  pd.pivot_table(


Unnamed: 0_level_0,년도,발생건수,부상자수,사망자수,월
자치구명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
강남구,24228,3722,5182,14,78
강동구,24228,1414,1910,11,78
강북구,24228,1277,1706,7,78
강서구,24228,1829,2491,20,78
관악구,24228,1363,1755,10,78
광진구,24228,973,1316,3,78
구로구,24228,1529,2096,13,78
금천구,24228,920,1353,9,78
노원구,24228,1703,2302,8,78
도봉구,24228,852,1163,4,78


In [16]:
# 년도와 월은 제거
trffic_anal = \
  trffic_anal[['발생건수','부상자수','사망자수']]

trffic_anal.head()

Unnamed: 0_level_0,발생건수,부상자수,사망자수
자치구명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
강남구,3722,5182,14
강동구,1414,1910,11
강북구,1277,1706,7
강서구,1829,2491,20
관악구,1363,1755,10


In [19]:
# 위도 경도를 포함한 데이터
seoul_limit = pd.read_csv("./Data/seoul.csv", encoding='euc-kr')
seoul_limit.columns = ['자치구명', 'lon', 'lat']
seoul_limit.head()

Unnamed: 0,자치구명,lon,lat
0,강남구,127.0475,37.51731
1,강동구,127.1238,37.53013
2,강북구,127.0255,37.63975
3,관악구,126.9515,37.47834
4,구로구,126.8875,37.49547


In [20]:
# traffic_anal과 seoul_limit 병합하기
data_result = pd.merge(trffic_anal, seoul_limit, on='자치구명')
data_result.head()

Unnamed: 0,자치구명,발생건수,부상자수,사망자수,lon,lat
0,강남구,3722,5182,14,127.0475,37.51731
1,강동구,1414,1910,11,127.1238,37.53013
2,강북구,1277,1706,7,127.0255,37.63975
3,강서구,1829,2491,20,126.8496,37.55094
4,관악구,1363,1755,10,126.9515,37.47834


In [23]:
data_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   자치구명    25 non-null     object 
 1   발생건수    25 non-null     int64  
 2   부상자수    25 non-null     int64  
 3   사망자수    25 non-null     int64  
 4   lon     25 non-null     float64
 5   lat     25 non-null     float64
dtypes: float64(2), int64(3), object(1)
memory usage: 1.3+ KB


In [24]:
# folium을 사용하기 위해 int를 float로 변환해야 한다.

data_result['발생건수'] = data_result['발생건수'].astype(float)
data_result['부상자수'] = data_result['부상자수'].astype(float)
data_result['사망자수'] = data_result['사망자수'].astype(float)

data_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   자치구명    25 non-null     object 
 1   발생건수    25 non-null     float64
 2   부상자수    25 non-null     float64
 3   사망자수    25 non-null     float64
 4   lon     25 non-null     float64
 5   lat     25 non-null     float64
dtypes: float64(5), object(1)
memory usage: 1.3+ KB


In [26]:
# 자치구청 표시
map = folium.Map(
  location=[37.5502, 126.982],
  zoom_start=11
)

for n in data_result.index:
  popup = folium.Popup(
    data_result['자치구명'][n],
    max_width=200
  )
  folium.Marker(
    [data_result['lat'][n], data_result['lon'][n]],
    popup=popup
  ).add_to(map)

map

### 자치구별 교통사고 발생건수 표시하기

In [28]:
data_result.head()

Unnamed: 0,자치구명,발생건수,부상자수,사망자수,lon,lat
0,강남구,3722.0,5182.0,14.0,127.0475,37.51731
1,강동구,1414.0,1910.0,11.0,127.1238,37.53013
2,강북구,1277.0,1706.0,7.0,127.0255,37.63975
3,강서구,1829.0,2491.0,20.0,126.8496,37.55094
4,관악구,1363.0,1755.0,10.0,126.9515,37.47834


In [31]:
# 자치구청 표시
map = folium.Map(
  location=[37.5502, 126.982],
  zoom_start=11
)

for n in data_result.index:
  popup = folium.Popup(
    data_result['자치구명'][n],
    max_width=200
  )
  folium.Marker(
    [data_result['lat'][n], data_result['lon'][n]],
    popup=popup
  ).add_to(map)

  folium.CircleMarker(
    [data_result['lat'][n], data_result['lon'][n]],
    radius=data_result['발생건수'][n] / 100,
    color = 'blue',
    fill_color = 'skyblue',
    fill = True,
    alpha = 0.5
  ).add_to(map)

map

In [None]:
# 자치구별 교통사고 사망자수를 지도로 표시하기
