# 데이터 시각화 1 - 지리데이터

## 목차
- 1. 단순 점 찍기
- 2. 클러스터
- 3. 구역 경계 나누기
- 4. 경로 표시

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import json

  import pandas.util.testing as tm


In [7]:
data_path = 'data/'
df1 = pd.read_csv(data_path + 'seoul_crime_by_office.csv',index_col=0)
df1.head()

Unnamed: 0,관서명,살인 발생,살인 검거,강도 발생,강도 검거,강간 발생,강간 검거,절도 발생,절도 검거,폭력 발생,폭력 검거,구별,lat,lng,검거
0,중부서,2,2,3,2,105,65,1395,477,1355,1170,중구,37.563646,126.98958,1.275416
1,종로서,3,3,6,5,115,98,1070,413,1278,1070,종로구,37.575558,126.984867,1.523847
2,남대문서,1,0,6,4,65,46,1153,382,869,794,중구,37.554758,126.973498,0.907372
3,서대문서,2,2,5,4,154,124,1812,738,2056,1711,서대문구,37.564785,126.966776,1.978299
4,혜화서,3,2,5,4,96,63,1114,424,1015,861,종로구,37.571853,126.998914,1.198382


## 1. 단순 점 찍기

In [3]:
m = folium.Map(location=[37.5502, 126.982],zoom_start=11) # 맵 세팅

for i in range(len(df1)): # 마커 찍기
    folium.Marker([df1['lat'][i],df1['lng'][i]],
                             popup = (str(df1['관서명'][i])+str(df1['살인 발생'][i]))).add_to(m)

folium.CircleMarker(
  [37.552018,126.939577],
  radius=50,
  color='#ffffgg',
  fill_color='#fffggg',
  popup='Sogang University'
).add_to(m)    
m

## 2. 클러스터

In [4]:
# 마커 클러스터

from folium.plugins import MarkerCluster

m = folium.Map(location=[37.5502, 126.982],zoom_start=11)

marker_cluster = MarkerCluster().add_to(m)
for i in range(len(df1)): 
    folium.Marker([df1['lat'][i],df1['lng'][i]],
                             popup = (str(df1['관서명'][i])+str(df1['살인 발생'][i]))).add_to(marker_cluster)
    
m

## 3. 구역 경계 나누기 

In [9]:
with open(data_path +'02. skorea_municipalities_geo_simple.json','r',encoding='utf-8') as json_file:
    df2 = json.load(json_file)
    
df2 

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'id': '강동구',
   'properties': {'code': '11250',
    'name': '강동구',
    'name_eng': 'Gangdong-gu',
    'base_year': '2013'},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[127.11519584981606, 37.557533180704915],
      [127.16683184366129, 37.57672487388627],
      [127.18408792330152, 37.55814280369575],
      [127.16530984307447, 37.54221851258693],
      [127.14672806823502, 37.51415680680291],
      [127.12123165719615, 37.52528270089],
      [127.1116764203608, 37.540669955324965],
      [127.11519584981606, 37.557533180704915]]]}},
  {'type': 'Feature',
   'id': '송파구',
   'properties': {'code': '11240',
    'name': '송파구',
    'name_eng': 'Songpa-gu',
    'base_year': '2013'},
   'geometry': {'type': 'Polygon',
    'coordinates': [[[127.0690698130372, 37.522279423505026],
      [127.10087519791962, 37.524841220167055],
      [127.1116764203608, 37.540669955324965],
      [127.12123165719615, 37.52528270089

In [10]:
df3 = pd.read_csv(data_path + 'seoul_crime_by_gu_scaled.csv',usecols=['구별','살인'],index_col=0)
df3

Unnamed: 0_level_0,살인
구별,Unnamed: 1_level_1
강남구,0.916667
강동구,0.166667
강북구,0.416667
관악구,0.583333
광진구,0.166667
구로구,0.5
금천구,0.083333
노원구,0.666667
도봉구,0.083333
동대문구,0.25


In [11]:
m = folium.Map(location=[37.5502, 126.982],zoom_start=11,
                tiles='Stamen Toner') #흑백지도를 불러옴

m.choropleth(geo_data=df2, # 구역 경계점을 표시한 json파일
              data = df3['살인'],
              columns=[df3.index,df3['살인']],
              key_on = 'feature.id',
              fill_color='Reds' #colormap에 대한 정보 : https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
              )
m



## 4. 경로 표시

In [13]:
from ipyleaflet import Map, AntPath,Marker , AwesomeIcon
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import clear_output

In [14]:
m = Map(center=(52, 10), zoom=8)
m

Map(center=[52, 10], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…

In [17]:
route_df = pd.read_csv(data_path + 'PatientRoute.csv')
route_df

Unnamed: 0,patient_id,global_num,date,province,city,type,latitude,longitude
0,1000000002,5.0,2020-01-26,Seoul,Gwangjin-gu,store,37.563992,127.029534
1,1000000002,5.0,2020-01-27,Seoul,Gangbuk-gu,store,37.592057,127.018898
2,1000000002,5.0,2020-01-28,Seoul,Gangbuk-gu,store,37.591669,127.018420
3,1000000002,5.0,2020-01-29,Seoul,Seongbuk-gu,hospital,37.606498,127.092761
4,1000000002,5.0,2020-01-30,Seoul,Seongbuk-gu,hospital,37.612772,127.098167
...,...,...,...,...,...,...,...,...
2063,1000000432,9599.0,2020-03-29,Seoul,Seongbuk-gu,hospital,37.612772,127.098167
2064,1000000433,9627.0,2020-03-27,Incheon,Jung-gu,airport,37.460191,126.440696
2065,1000000433,9627.0,2020-03-28,Seoul,Mapo-gu,hospital,37.578588,126.936251
2066,1000000433,9627.0,2020-03-29,Seoul,Seodaemun-gu,hospital,37.604279,126.905087


In [19]:
# patient_id를 보다 보기 편하게 바꿈
id_list = list(set(route_df['patient_id'].tolist()))
new_id_dict = dict(zip(id_list,range(1,len(id_list))))

route_df['new_id'] = route_df['patient_id'].replace(new_id_dict)

In [22]:
info_df = pd.read_csv(data_path+'PatientInfo.csv')
#info_df['new_id'] = info_df['patient_id'].replace(new_id_dict)
info_df

Unnamed: 0,patient_id,sex,age,country,province,city,infection_case,infected_by,contact_number,symptom_onset_date,confirmed_date,released_date,deceased_date,state
0,1000000001,male,50s,Korea,Seoul,Gangseo-gu,overseas inflow,,75,2020-01-22,2020-01-23,2020-02-05,,released
1,1000000002,male,30s,Korea,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
2,1000000003,male,50s,Korea,Seoul,Jongno-gu,contact with patient,2002000001,17,,2020-01-30,2020-02-19,,released
3,1000000004,male,20s,Korea,Seoul,Mapo-gu,overseas inflow,,9,2020-01-26,2020-01-30,2020-02-15,,released
4,1000000005,female,20s,Korea,Seoul,Seongbuk-gu,contact with patient,1000000002,2,,2020-01-31,2020-02-24,,released
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5160,7000000015,female,30s,Korea,Jeju-do,Jeju-do,overseas inflow,,25,,2020-05-30,2020-06-13,,released
5161,7000000016,,,Korea,Jeju-do,Jeju-do,overseas inflow,,,,2020-06-16,2020-06-24,,released
5162,7000000017,,,Bangladesh,Jeju-do,Jeju-do,overseas inflow,,72,,2020-06-18,,,isolated
5163,7000000018,,,Bangladesh,Jeju-do,Jeju-do,overseas inflow,,,,2020-06-18,,,isolated


In [23]:
df4 = pd.merge(route_df,info_df,how='inner',on = 'patient_id')
df4

Unnamed: 0,patient_id,global_num,date,province_x,city_x,type,latitude,longitude,new_id,sex,...,province_y,city_y,infection_case,infected_by,contact_number,symptom_onset_date,confirmed_date,released_date,deceased_date,state
0,1000000002,5.0,2020-01-26,Seoul,Gwangjin-gu,store,37.563992,127.029534,1,male,...,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
1,1000000002,5.0,2020-01-27,Seoul,Gangbuk-gu,store,37.592057,127.018898,1,male,...,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
2,1000000002,5.0,2020-01-28,Seoul,Gangbuk-gu,store,37.591669,127.018420,1,male,...,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
3,1000000002,5.0,2020-01-29,Seoul,Seongbuk-gu,hospital,37.606498,127.092761,1,male,...,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
4,1000000002,5.0,2020-01-30,Seoul,Seongbuk-gu,hospital,37.612772,127.098167,1,male,...,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2063,1000000432,9599.0,2020-03-29,Seoul,Seongbuk-gu,hospital,37.612772,127.098167,382,female,...,Seoul,Geumcheon-gu,etc,,,,2020-03-29,,,released
2064,1000000433,9627.0,2020-03-27,Incheon,Jung-gu,airport,37.460191,126.440696,383,female,...,Seoul,Dongjak-gu,etc,,,,2020-03-29,,,released
2065,1000000433,9627.0,2020-03-28,Seoul,Mapo-gu,hospital,37.578588,126.936251,383,female,...,Seoul,Dongjak-gu,etc,,,,2020-03-29,,,released
2066,1000000433,9627.0,2020-03-29,Seoul,Seodaemun-gu,hospital,37.604279,126.905087,383,female,...,Seoul,Dongjak-gu,etc,,,,2020-03-29,,,released


In [24]:
#성별 연령대, 확진 일자에 대하여 결측값을 'Unknown'으로 대체
df4[['sex','age','confirmed_date']] = df4[['sex','age','confirmed_date']].fillna('Unknown')

In [25]:
# 출발지와 종착지에 대한 아이콘 설정을 미리 정의
icon1 = AwesomeIcon(
    name='street-view',
    marker_color='green',
    icon_color='black',
    spin=False
)

icon2 = AwesomeIcon(
    name='street-view',
    marker_color='red',
    icon_color='black',
    spin=False
)

In [28]:
i = 78
# 해당 번호 환자의 df만 불러옴
tmp_df = df4[df4['new_id']==i]
route_loc = tmp_df[['latitude','longitude']].values.tolist()
print('성별: ',tmp_df['sex'].values[0])
print('연령대: ',tmp_df['age'].values[0])
print('확진 일자: ',df4[df4['new_id']==i]['confirmed_date'].values[0])
m = Map(center=tuple(route_loc[0]),zoom=8)
m.add_layer(Marker(icon = icon1,location=tuple(route_loc[0]),title = 'a')) # 출발지에 대한 마커
m+= AntPath(locations=route_loc,use = 'polyline',dash_array=[2,20])# 앤트 패스 추가
m.add_layer(Marker(icon = icon2,location=tuple(route_loc[-1]))) # 도착지에 대한 마커
m

성별:  female
연령대:  30s
확진 일자:  2020-03-01


Map(center=[37.5115918, 127.028073], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title…

In [29]:
patient_dropdown = widgets.Dropdown(options=range(1,len(id_list)+1),description='환자 ID',disabled=False)
start = widgets.Button(description='경로 시각화')
to_home_button = widgets.Button(description='뒤로 가기')

In [32]:
def viz(null):
    clear_output()
    display(patient_dropdown)
    display(start)
    print('환자 ID: ',patient_dropdown.value)    
    tmp_df = df4[df4['new_id']==patient_dropdown.value]
    route_loc = tmp_df[['latitude','longitude']].values.tolist()
    print('성별: ',tmp_df['sex'].values[0])
    print('연령대: ',tmp_df['age'].values[0])
    print('확진 일자: ',df4[df4['new_id']==i]['confirmed_date'].values[0])
    
    m = Map(center=tuple(route_loc[0]),zoom=9)
    m.add_layer(Marker(icon = icon1,location=tuple(route_loc[0]),title = 'a')) # 출발지에 대한 마커
    m+= AntPath(locations=route_loc,use = 'polyline',dash_array=[2,20])# 앤트 패스 추가
    m.add_layer(Marker(icon = icon2,location=tuple(route_loc[-1]))) # 도착지에 대한 마커
    display(m)

def to_home(null):
    clear_output()
    display(patient_dropdown)
    display(start)
    print('환자 ID: ',patient_dropdown.value)    
    tmp_df = df4[df4['new_id']==patient_dropdown.value]
    route_loc = tmp_df[['latitude','longitude']].values.tolist()
    print('성별: ',tmp_df4['sex'].values[0])
    print('연령대: ',tmp_df4['age'].values[0])
    print('확진 일자: ',d4f[df4['new_id']==i]['confirmed_date'].values[0])
    
# 함수와 버튼을 연결. (버튼을 누르면 함수가 작동!)
start.on_click(viz)
to_home_button.on_click(to_home)

In [33]:
display(patient_dropdown)
display(start)
display(to_home_button)

Dropdown(description='환자 ID', index=9, options=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…

Button(description='경로 시각화', style=ButtonStyle())

환자 ID:  10
성별:  male
연령대:  80s
확진 일자:  2020-03-01


Map(center=[37.5870127, 127.0268836], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_titl…