# 필요 라이브러리 설치 및 불러오기

In [None]:
!pip install pandas
!pip install haversine
!pip install IPython

In [1]:
import pandas as pd
from haversine import haversine
from IPython.display import display_html

In [2]:
df1 = pd.read_csv('중구,성동구,용산구 노드.csv')
# df1 = pd.read_csv('01_seoul_성동구_remove_intersections.csv')
# df2 = pd.read_csv('02_seoul_용산구_remove_intersections.csv')
# df3 = pd.read_csv('03_seoul_중구_remove_intersections.csv')
df4 = pd.read_csv('04_서울시_가로수_위치_종로제외.csv')
df5 = pd.read_csv('05_서울시 주요 공원현황.csv')
df6 = pd.read_csv('06_서울특별시_성동구_도시공원정보_20210303_1614747038345_11665.csv', encoding='cp949')
df7 = pd.read_csv('07_서울특별시_용산구_도시공원정보_20200317.csv', encoding='cp949')
df8 = pd.read_csv('08_서울특별시_중구_도시공원정보_20200604_1607700631264_6683.csv',encoding='cp949')
df9 = pd.read_csv('09_서울시 녹지대 위치정보 (좌표계_ WGS1984).csv',encoding='cp949')

# A) 전처리

### 1) 보행자도로 노드 위경도 좌표 추출 및 데이터 결합

In [3]:
df1 = df1.rename(columns={'x':'lat', 'y':'lon'})

node_df = df1
node_df

Unnamed: 0,lat,lon
0,37.526794,127.001205
1,37.527184,127.000468
2,37.525970,126.997346
3,37.525748,126.998131
4,37.527696,127.002375
...,...,...
5534,37.563132,127.056399
5535,37.561535,127.042495
5536,37.561900,127.043529
5537,37.568020,127.029326


### 2) 가로수 데이터 필터링

In [4]:
tree_df = df4[(df4['구']=='중구') | (df4['구']=='성동구') | (df4['구']=='용산구')]
print(tree_df['구'].value_counts())

tree_df = tree_df[['lat', 'lon']]
tree_df

중구     7730
용산구    7523
성동구    6758
Name: 구, dtype: int64


Unnamed: 0,lat,lon
0,37.562722,126.998366
1,37.554485,127.011864
2,37.567167,126.970089
3,37.568109,127.015636
4,37.567539,126.985849
...,...,...
22006,37.542335,127.056249
22007,37.553943,127.041835
22008,37.567458,127.046119
22009,37.555728,127.025528


### 3) 공원 데이터 필터링 및 결합

In [5]:
# 서울 주요 공원
park_1 = df5[(df5['지역']=='중구') | (df5['지역']=='성동구') | (df5['지역']=='용산구')]
park_1 = park_1[['lat','lng']]
park_1 = park_1.rename(columns={'lng':'lon'})
print(len(park_1))


# 성동구
park_2 = df6[df6['공원구분']!='소공원']
park_2 = park_2[['위도','경도']]
park_2 = park_2.dropna(axis=0)
park_2 = park_2.rename(columns={'위도':'lat','경도':'lon'})
print(len(park_2))

# 용산구
park_3 = df7[df7['공원구분']!='소공원']
park_3 = park_3[['위도','경도']]
park_3 = park_3.dropna(axis=0)
park_3 = park_3.rename(columns={'위도':'lat','경도':'lon'})
print(len(park_3))

# 중구
park_4 = df8[df8['공원구분']!='소공원']
park_4 = park_4[['위도','경도']]
park_4 = park_4.dropna(axis=0)
park_4 = park_4.rename(columns={'위도':'lat','경도':'lon'})
print(len(park_4))

park_df = pd.concat([park_1, park_2, park_3, park_4])
park_df

11
35
27
18


Unnamed: 0,lat,lon
0,37.550140,126.990377
3,37.543072,127.041798
8,37.557228,127.021765
10,37.544814,127.052402
14,37.567524,127.003819
...,...,...
30,37.555480,126.964816
31,37.560568,126.968867
32,37.568073,126.985152
33,37.559824,126.971601


### 4) 녹지대 정보 필터링

In [6]:
df9 = df9[(df9['구명']=='중구') | (df9['구명']=='성동구') | (df9['구명']=='용산구')]
df9 = df9[['위도','경도']]
df9 = df9.rename(columns={'위도':'lat','경도':'lon'})

lat_lst = []
lon_lst = []


for idx, row in df9.iterrows():
    lat = row['lat']
    lon = row['lon']
    lat_lst.append(float(lat))
    lon_lst.append(float(lon))
    

green_df = pd.DataFrame()

green_df['lat'] = lat_lst
green_df['lon'] = lon_lst
green_df

Unnamed: 0,lat,lon
0,37.559825,126.971240
1,37.561471,126.980980
2,37.566887,126.998411
3,37.563283,127.002248
4,37.563417,127.002890
...,...,...
285,37.529546,126.991530
286,37.551687,127.035226
287,37.569349,127.024651
288,37.548178,127.021169


# B) 노드 별 점수 산정

In [7]:
df_os_intersec = node_df.sort_values(by=['lat'], ascending=True)
df_os_intersec.head()

Unnamed: 0,lat,lon
2158,37.514975,126.982874
2159,37.51534,126.983221
4694,37.515573,126.983079
13,37.515965,126.986348
288,37.516241,126.979701


### 1) 데이터 형태 변환

In [8]:
# 불러온 데이터 2차원 리스트로 전처리
def make_2d_list(df):
    x1 = []
    x2 = []
    for i in range(len(df)):
        x1.append(df.iloc[i, 0])
        x2.append(df.iloc[i, 1])
    list2 = list(zip(x1, x2))
    return list2

In [9]:
os_node_list = make_2d_list(df_os_intersec) # 노드
tree_node_list = make_2d_list(tree_df) # 나무
park_node_list = make_2d_list(park_df) # 공원
green_node_list = make_2d_list(green_df) # 녹지대

### 2) 점수 산정

In [10]:
# 노드와 데이터의 거리에 따라 점수 산정
def make_short_distance_dataframe(lis1, lis2, dist):
    y = []
    for i in range(len(lis1)):
        # m 설정
        min_x = dist
        for j in range(len(lis2)):
            x = haversine(lis1[i], lis2[j], unit = 'km')
            # 지정 거리 이내에 있다면 x값 저장
            if  x <= min_x:
                min_x = x
        y.append(min_x)
    mergedf = pd.DataFrame(data = y, index = os_node_list, columns = ['Min_distance'])
    return mergedf

In [11]:
df_os_to_tree = make_short_distance_dataframe(os_node_list, tree_node_list, 0.5) # 500m 이내 (나무)
df_os_to_tree['location'] = df_os_to_tree.index

df_os_to_park = make_short_distance_dataframe(os_node_list, park_node_list, 1.5) # 1.5km 이내 (공원)
df_os_to_park['location'] = df_os_to_park.index

df_os_to_green = make_short_distance_dataframe(os_node_list, green_node_list, 1.5) # 1.5km 이내 (녹지대)
df_os_to_green['location'] = df_os_to_green.index

In [12]:
print(len(df_os_to_tree))
print(len(df_os_to_park))
print(len(df_os_to_green))

5539
5539
5539


### 3) 점수 확인

In [13]:
#여러 데이터프레임 표시
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str += df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'), raw=True)

In [14]:
#인덱스를 단순 숫자로 바꿈
df_os_to_tree.reset_index(drop = True, inplace = True)
df_os_to_park.reset_index(drop = True, inplace = True)
df_os_to_green.reset_index(drop = True, inplace=True)
display_side_by_side(df_os_to_tree,df_os_to_park,df_os_to_green)

Unnamed: 0,Min_distance,location
0,0.136978,"(37.5149747, 126.98287420000001)"
1,0.098809,"(37.5153401, 126.9832213)"
2,0.070945,"(37.51557329999999, 126.9830786)"
3,0.071107,"(37.5159654, 126.98634799999999)"
4,0.192736,"(37.51624149999999, 126.9797013)"
5,0.022388,"(37.51643399999999, 126.9824226)"
6,0.052897,"(37.51647870000001, 126.98443170000002)"
7,0.201369,"(37.516797600000004, 126.97731950000001)"
8,0.007905,"(37.5168212, 126.98789450000001)"
9,0.032065,"(37.516852861635215, 126.98559110326009)"

Unnamed: 0,Min_distance,location
0,0.286525,"(37.5149747, 126.98287420000001)"
1,0.238344,"(37.5153401, 126.9832213)"
2,0.217887,"(37.51557329999999, 126.9830786)"
3,0.262248,"(37.5159654, 126.98634799999999)"
4,0.398986,"(37.51624149999999, 126.9797013)"
5,0.174426,"(37.51643399999999, 126.9824226)"
6,0.109398,"(37.51647870000001, 126.98443170000002)"
7,0.591581,"(37.516797600000004, 126.97731950000001)"
8,0.350738,"(37.5168212, 126.98789450000001)"
9,0.154054,"(37.516852861635215, 126.98559110326009)"

Unnamed: 0,Min_distance,location
0,0.188169,"(37.5149747, 126.98287420000001)"
1,0.151132,"(37.5153401, 126.9832213)"
2,0.123304,"(37.51557329999999, 126.9830786)"
3,0.234898,"(37.5159654, 126.98634799999999)"
4,0.281392,"(37.51624149999999, 126.9797013)"
5,0.045467,"(37.51643399999999, 126.9824226)"
6,0.056569,"(37.51647870000001, 126.98443170000002)"
7,0.487674,"(37.516797600000004, 126.97731950000001)"
8,0.338684,"(37.5168212, 126.98789450000001)"
9,0.158953,"(37.516852861635215, 126.98559110326009)"


### 4) 가중치 부여

In [15]:
df_os_to_tree['rank'] = df_os_to_tree['Min_distance'].rank(method = 'max', ascending = False)
df_os_to_tree
df_os_to_park['rank'] = df_os_to_park['Min_distance'].rank(method = 'max', ascending = False)
df_os_to_park
df_os_to_green['rank'] = df_os_to_green['Min_distance'].rank(method = 'max', ascending = False)
df_os_to_green
df_node_score = df_os_to_tree + df_os_to_park+ df_os_to_green
df_node_score['score'] = df_os_to_tree['rank'] + 1.5*df_os_to_park['rank'] + 1.5*df_os_to_green['rank'] # 공원과 녹지대는 1.5배의 가중치 부여
df_node_score

Unnamed: 0,Min_distance,location,rank,score
0,0.611672,"(37.5149747, 126.98287420000001, 37.5149747, 1...",8507.0,11957.5
1,0.488285,"(37.5153401, 126.9832213, 37.5153401, 126.9832...",10127.0,14090.5
2,0.412137,"(37.51557329999999, 126.9830786, 37.5155732999...",11277.0,15521.0
3,0.568253,"(37.5159654, 126.98634799999999, 37.5159654, 1...",9337.0,12614.5
4,0.873115,"(37.51624149999999, 126.9797013, 37.5162414999...",5663.0,8010.0
...,...,...,...,...
5534,0.512171,"(37.57264189999998, 127.04146440000001, 37.572...",12227.0,15622.0
5535,0.326594,"(37.57264365000204, 127.03826175000069, 37.572...",12404.0,16895.0
5536,0.416555,"(37.572736350013706, 127.0407823000003, 37.572...",12549.0,16541.5
5537,0.317196,"(37.5727749, 127.03845659999999, 37.5727749, 1...",12837.0,17337.0


### 5) 위경도와 score만 남기고 결과물 정리

In [16]:
lat_1 = []
lon_1 = []

# 결합된 location 컬럼 정리
tmp = df_node_score[['location']]

for idx, row in tmp.iterrows():
    lat = row[0][0]
    lon = row[0][1]
    lat_1.append(lat)
    lon_1.append(lon)
    
df_node_score['lat'] = lat_1
df_node_score['lon'] = lon_1

df_node_score = df_node_score[['score','lat','lon']]
df_node_score

Unnamed: 0,score,lat,lon
0,11957.5,37.514975,126.982874
1,14090.5,37.515340,126.983221
2,15521.0,37.515573,126.983079
3,12614.5,37.515965,126.986348
4,8010.0,37.516241,126.979701
...,...,...,...
5534,15622.0,37.572642,127.041464
5535,16895.0,37.572644,127.038262
5536,16541.5,37.572736,127.040782
5537,17337.0,37.572775,127.038457


In [17]:
df_node_score.to_csv('자연_score_final.csv', encoding='utf8', index=False)