In [1]:
import sys
import os

from os import listdir
from os.path import isfile, join
from pathlib import Path

root = Path(os.getcwd()).parent.parent
sys.path.append(str(root))

In [2]:
import pandas as pd

from datetime import datetime, timedelta

### 데이터 원본

In [3]:
status_path = join(root, 'data', 'extracted', 'Korea_Covid_Patient.csv')
status = pd.read_csv(status_path)
status.head()

Unnamed: 0,District,State,data_date,new_pat,pop,long,lat,no_pat
0,Seoul,Seoul,20200122,0,9689159,126.978,37.5665,0
1,Seoul,Seoul,20200123,0,9689159,126.978,37.5665,0
2,Seoul,Seoul,20200124,1,9689159,126.978,37.5665,1
3,Seoul,Seoul,20200125,0,9689159,126.978,37.5665,1
4,Seoul,Seoul,20200126,0,9689159,126.978,37.5665,1


In [4]:
route_path = join(root, 'data', 'extracted', 'merged_route_final.csv')
route = pd.read_csv(route_path)
route['date'] = pd.to_datetime(route['date'])
route.head()

Unnamed: 0,patient_id,city,infection_case,date,type,latitude,longitude,row,col
0,2000000001,gangnam,overseas inflow,2020-01-22,hospital,37.524355,127.027948,196.0,169.0
1,1400000003,yongsan,etc,2020-01-23,etc,37.536606,126.97714,182.0,131.0
2,2000000001,gangnam,overseas inflow,2020-01-23,store,37.527752,127.01948,192.0,163.0
3,1000000001,jung,overseas inflow,2020-01-24,hospital,37.567241,127.005659,147.0,152.0
4,1400000003,jongno,etc,2020-01-24,etc,37.579617,126.977041,133.0,131.0


In [5]:
# type, reason column type에 맞게 수정

type_shrink_path = join(root, 'data', 'checklist', 'type_column.csv')
type_shrink_df = pd.read_csv(type_shrink_path)

reason_shrink_path = join(root, 'data', 'checklist', 'infection_case_column.csv')
reason_shrink_df = pd.read_csv(reason_shrink_path)

for index, row in route.iterrows():
    new_type = type_shrink_df.loc[type_shrink_df['type'] == row['type']].iloc[0]['type_1']
    route.loc[index, 'type'] = new_type
    
    new_reason = reason_shrink_df.loc[reason_shrink_df['infection_case'] == row['infection_case']].iloc[0]['infection_case_1']
    route.loc[index, 'infection_case'] = new_reason
    
route.head()

Unnamed: 0,patient_id,city,infection_case,date,type,latitude,longitude,row,col
0,2000000001,gangnam,overseas,2020-01-22,medical_institution,37.524355,127.027948,196.0,169.0
1,1400000003,yongsan,other,2020-01-23,other,37.536606,126.97714,182.0,131.0
2,2000000001,gangnam,overseas,2020-01-23,store,37.527752,127.01948,192.0,163.0
3,1000000001,jung,overseas,2020-01-24,medical_institution,37.567241,127.005659,147.0,152.0
4,1400000003,jongno,other,2020-01-24,other,37.579617,126.977041,133.0,131.0


### 방문지 유형 상관 관계

In [7]:
types = list(dict.fromkeys(route['type'].tolist()))
types.insert(0, 'date')

type_df = pd.DataFrame(columns=types)
for index1, day_status in status.iterrows():
    date = datetime.strptime(str(day_status['data_date']), '%Y%m%d')
    type_df = type_df.append({'date': date}, ignore_index=True)
    type_df = type_df.fillna(0)
    
    rows = route.loc[route['date'] == date]
    for index2, row in rows.iterrows():
        type_index = type_df.loc[type_df['date'] == date].index[0]
        type_df.loc[type_index, row['type']] += 1

type_df = type_df.set_index(['date'])
type_df.index.name = None    
type_df.head()

Unnamed: 0,date,medical_institution,other,store,fnb,public_transportation,religious_facility,entertainment_facility,service_facility,sports_facility,...,club,public_institution,finantial_institution,gethering,outdoor,sauna,work,door_sales,exhibition,hall
0,2020-01-22,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,0,2,0,1,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,0,1,2,1,2,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
type_list = ['educational_institution', 'hall', 'store', 'finantial_institution']

for type_elem in type_list:
    type_column = type_df.columns.to_list()
    type_column.remove(type_elem)
    type_column.insert(0, type_elem)
    type_df = type_df[type_column]
    
    sorted_types = type_df.corr().iloc[:,:1].T.iloc[0].sort_values(ascending=False).to_dict()
    new_dict = dict(filter(lambda elem: elem[1] > 0.3, sorted_types.items()))
    new_list = list(new_dict.keys())
    print(type_elem)
    print(new_list)
    print()

educational_institution
['educational_institution', 'finantial_institution', 'store', 'fnb']

hall
['hall', 'sports_facility', 'service_facility']

store
['store', 'fnb', 'work', 'medical_institution', 'educational_institution', 'public_transportation', 'gethering']

finantial_institution
['finantial_institution', 'educational_institution', 'service_facility', 'sports_facility', 'sauna']



### 감염 원인 상관 관계

In [35]:
reasons = list(dict.fromkeys(route['infection_case'].tolist()))
reasons.insert(0, 'date')

reason_df = pd.DataFrame(columns=reasons)
for index1, day_status in status.iterrows():
    date = datetime.strptime(str(day_status['data_date']), '%Y%m%d')
    reason_df = reason_df.append({'date': date}, ignore_index=True)
    reason_df = reason_df.fillna(0)
    
    rows = route.loc[route['date'] == date]
    for index2, row in rows.iterrows():
        reason_index = reason_df.loc[reason_df['date'] == date].index[0]
        reason_df.loc[reason_index, row['infection_case']] += 1

reason_df.head()

Unnamed: 0,date,overseas,other,contact,hospital,building,church,call_center,health_facility,educational_facility,...,network_marketing,nursing_facility,company,gethering,public_transportation,office,bank,protest,theater,cafe
0,2020-01-22,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2020-01-23,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2020-01-24,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2020-01-25,4,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2020-01-26,4,1,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
reason_list = ['other', 'church', 'gethering', 'protest', 'overseas', 'office', 'educational_facility', 'nursing_facility']

for reason_elem in reason_list:
    reason_column = reason_df.columns.to_list()
    reason_column.remove(reason_elem)
    reason_column.insert(0, reason_elem)
    reason_df = reason_df[reason_column]
    
    sorted_types = reason_df.corr().iloc[:,:1].T.iloc[0].sort_values(ascending=False).to_dict()
    new_dict = dict(filter(lambda elem: elem[1] > 0.3, sorted_types.items()))
    new_list = list(new_dict.keys())
    print(reason_elem)
    print(new_list)
    print()

other
['other', 'church', 'contact', 'protest', 'overseas', 'theater']

church
['church', 'other', 'protest', 'contact', 'gethering']

gethering
['gethering', 'office', 'bank', 'cafe', 'church']

protest
['protest', 'church', 'other', 'theater', 'educational_facility', 'public_institution']

overseas
['overseas', 'other', 'contact']

office
['office', 'gethering', 'bank']

educational_facility
['educational_facility', 'theater', 'public_institution', 'insurance', 'protest']

nursing_facility
['nursing_facility']

