## 병합 데이터에서 row, col 수정
- 수정 데이터셋
  - `data\extracted\merged_route_final.csv`
- 최종 데이터 저장 경로
  - `data\extracted\merged_route_final.csv`

In [1]:
import sys
import os

from os import listdir
from os.path import isfile, join
from pathlib import Path

root = Path(os.getcwd()).parent.parent
sys.path.append(str(root))

In [2]:
import pandas as pd
import numpy as np

In [3]:
route_path = join(root, 'data', 'extracted', 'merged_route_final.csv')
route_df = pd.read_csv(route_path)
route_df.head()

Unnamed: 0,patient_id,city,infection_case,date,type,latitude,longitude,row,col
0,2000000001,gangnam,overseas inflow,2020-01-22,hospital,37.524355,127.027948,196.0,169.0
1,1400000003,yongsan,etc,2020-01-23,etc,37.536606,126.97714,182.0,131.0
2,2000000001,gangnam,overseas inflow,2020-01-23,store,37.527752,127.01948,192.0,163.0
3,1000000001,jung,overseas inflow,2020-01-24,hospital,37.567241,127.005659,147.0,152.0
4,1400000003,jongno,etc,2020-01-24,etc,37.579617,126.977041,133.0,131.0


In [5]:
# row, col 값 삭제
route_df['row'] = -1
route_df['col'] = -1
route_df.head()

Unnamed: 0,patient_id,city,infection_case,date,type,latitude,longitude,row,col
0,2000000001,gangnam,overseas inflow,2020-01-22,hospital,37.524355,127.027948,-1,-1
1,1400000003,yongsan,etc,2020-01-23,etc,37.536606,126.97714,-1,-1
2,2000000001,gangnam,overseas inflow,2020-01-23,store,37.527752,127.01948,-1,-1
3,1000000001,jung,overseas inflow,2020-01-24,hospital,37.567241,127.005659,-1,-1
4,1400000003,jongno,etc,2020-01-24,etc,37.579617,126.977041,-1,-1


In [15]:
# latitude, longitude → grid 정보
def to_grid(lat, lon):
    row = 256
    column = 256
    seoul_start_lat = 37.698098
    seoul_start_lon = 126.799791
    seoul_end_lat = 37.428
    seoul_end_lon = 127.142928
    lat_distance = seoul_end_lat - seoul_start_lat 
    lon_distance = seoul_end_lon - seoul_start_lon

    if seoul_end_lat > lat or lat > seoul_start_lat:
        return -1, -1
    if seoul_start_lon > lon or lon > seoul_end_lon:
        return -1, -1

    lat_distance2= lat - seoul_start_lat
    lon_distance2 = lon - seoul_start_lon
    row = (lat_distance2 * 255) / lat_distance
    col = (lon_distance2 * 255) / lon_distance

    return int(row), int(col)

for index, row in route_df.iterrows():
    lat, lon = to_grid(row['latitude'], row['longitude'])
    route_df.iloc[index, route_df.columns.get_loc('row')] = lat
    route_df.iloc[index, route_df.columns.get_loc('col')] = lon
    
route_df.head()

Unnamed: 0,patient_id,city,infection_case,date,type,latitude,longitude,row,col
0,2000000001,gangnam,overseas inflow,2020-01-22,hospital,37.524355,127.027948,164,169
1,1400000003,yongsan,etc,2020-01-23,etc,37.536606,126.97714,152,131
2,2000000001,gangnam,overseas inflow,2020-01-23,store,37.527752,127.01948,160,163
3,1000000001,jung,overseas inflow,2020-01-24,hospital,37.567241,127.005659,123,152
4,1400000003,jongno,etc,2020-01-24,etc,37.579617,126.977041,111,131


In [18]:
# 필요 없는 행 제거
indices_to_del = []
print('len: %d' % len(route_df.index))
for index, row in route_df.iterrows():
    lat = row['row']
    lon = row['col']
    
    if lat == -1 or lon == -1:
        indices_to_del.append(index)
        
for index in indices_to_del:
    route_df = route_df.drop(index=index)
route_df = route_df.reset_index(drop=True)    

print('len: %d' % len(route_df.index))

len: 6924
len: 6924


In [19]:
# 결과 저장
route_df.to_csv(route_path, encoding='utf-8-sig', index=False)