### 데이터 불러오기

In [1]:
import pandas as pd
import numpy as np
import os

def load_hotel_reserve():
  customer_tb = pd.read_csv('./data/customer.csv')
  hotel_tb = pd.read_csv('./data/hotel.csv')
  reserve_tb = pd.read_csv('./data/reserve.csv')
  return customer_tb, hotel_tb, reserve_tb


def load_holiday_mst():
  holiday_tb = pd.read_csv('./data/holiday_mst.csv',
                           index_col=False)
  return holiday_tb


def load_production():
  production_tb = pd.read_csv('./data/production.csv')
  return production_tb


def load_production_missing_num():
  production_tb = pd.read_csv('./data/production_missing_num.csv')
  return production_tb


def load_production_missing_category():
  production_tb = pd.read_csv('./data/production_missing_category.csv')
  return production_tb


def load_monthly_index():
  monthly_index_tb = pd.read_csv('./data/monthly_index.csv')
  return monthly_index_tb


def load_meros_txt():
  with open('./data/txt/meros.txt', 'r') as f:
    meros = f.read()
    f.close()
  return meros


In [2]:
customer_tb, hotel_tb, reserve_tb = load_hotel_reserve()

### 4. 위치정보 데이터 전처리 기법

#### 지역 기준 좌표계 변환

In [3]:
# GIS 관련 패키지 설치하기
%pip install pyproj

Defaulting to user installation because normal site-packages is not writeable
Collecting pyproj
  Downloading pyproj-3.6.1-cp39-cp39-macosx_11_0_arm64.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 5.9 MB/s eta 0:00:01
[?25hCollecting certifi
  Downloading certifi-2026.1.4-py3-none-any.whl (152 kB)
[K     |████████████████████████████████| 152 kB 10.3 MB/s eta 0:00:01
[?25hInstalling collected packages: certifi, pyproj
Successfully installed certifi-2026.1.4 pyproj-3.6.1
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
customer_tb.head()

Unnamed: 0,customer_id,age,sex,home_latitude,home_longitude
0,c_1,41,man,35.092193,136.512347
1,c_2,38,man,35.325076,139.410551
2,c_3,49,woman,35.120543,136.511179
3,c_4,43,man,43.034868,141.240314
4,c_5,31,man,35.102661,136.523797


35.092193 = 35도 09분 21.93초
위도/경도는 도+분+초로 이루어져 있고            
위 데이터프레임에는 이를 그대로 이어붙인 압축 형태로 저장되어 있다

In [5]:
import pyproj

#  압축형(그대로 이어 붙인 상태) -> 연속형(실제 값) 으로 변환하는 함수
def convert_to_continuous(x):
    x_min = ( x * 100 - int(x*100) ) * 100
    x_sec = ( x - int(x) - x_min / 10000 ) * 100
    return int(x) + x_sec / 60 + x_min/60/60

# 함수 지정해서 변환
customer_tb['home_latitude'] = customer_tb['home_latitude'].apply(lambda x: convert_to_continuous(x))
customer_tb['home_longitude'] = customer_tb['home_longitude'].apply(lambda x: convert_to_continuous(x))

customer_tb.head()

Unnamed: 0,customer_id,age,sex,home_latitude,home_longitude
0,c_1,41,man,35.156092,136.856519
1,c_2,38,man,35.547433,139.684864
2,c_3,49,woman,35.201508,136.853275
3,c_4,43,man,43.063522,141.400872
4,c_5,31,man,35.174058,136.877214


In [10]:
# 축지계 설정
epsg_world = pyproj.Proj('+init=EPSG:4326')
epsg_korea = pyproj.Proj('+init=EPSG:4301')

# 한국 좌표계 -> 세계 표준 좌표계로 변환
home_position = customer_tb[['home_longitude', 'home_latitude']].apply(lambda x:
                                                                       pyproj.transform(epsg_korea, epsg_world, x[0], x[1]), axis=1)

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)
  pyproj.transform(epsg_korea, epsg_world, x[0], x[1]), axis=1)
  pyproj.transform(epsg_korea, epsg_world, x[0], x[1]), axis=1)


In [11]:
# 갱신
customer_tb_orig = customer_tb.copy() # 원본 복사
customer_tb['home_longitude'] = [x[0] for x in home_position]
customer_tb['home_latitude'] = [x[1] for x in home_position]
customer_tb.head(3)

Unnamed: 0,customer_id,age,sex,home_latitude,home_longitude
0,c_1,41,man,35.159315,136.853555
1,c_2,38,man,35.550685,139.681642
2,c_3,49,woman,35.204727,136.85031


#### 두 지점 간 거리 및 방향 계산 

In [12]:
# install geopy
%pip install geopy

Defaulting to user installation because normal site-packages is not writeable
Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 1.9 MB/s eta 0:00:01
[?25hCollecting geographiclib<3,>=1.52
  Downloading geographiclib-2.1-py3-none-any.whl (40 kB)
[K     |████████████████████████████████| 40 kB 5.4 MB/s eta 0:00:01
[?25hInstalling collected packages: geographiclib, geopy
Successfully installed geographiclib-2.1 geopy-2.4.1
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


위치 계산을 위한 파이썬 패키지
1. pyproj : 지리적 좌표 변환, 계산을 위한 패키지
2. geopy : 거리 계산을 위한 패키지

In [13]:
import math
import pyproj

from geopy.distance import great_circle

# 예약 테이블에 고객+호텔 정보 결합
reserve_tb = pd.merge(reserve_tb, customer_tb, on='customer_id', how='inner')
reserve_tb = pd.merge(reserve_tb, hotel_tb, on='hotel_id', how='inner')

reserve_tb.head(3)

Unnamed: 0,reserve_id,hotel_id,customer_id,reserve_datetime,checkin_date,checkin_time,checkout_date,people_num,total_price,age,sex,home_latitude,home_longitude,base_price,big_area_name,small_area_name,hotel_latitude,hotel_longitude,is_business
0,r1,h_75,c_1,2016-03-06 13:09:42,2016-03-26,10:00:00,2016-03-29,4,97200,41,man,35.159315,136.853555,8100,B,B-2,35.54586,139.701217,False
1,r2,h_219,c_1,2016-07-16 23:39:55,2016-07-20,11:30:00,2016-07-21,2,20600,41,man,35.159315,136.853555,10300,B,B-3,35.644729,139.693389,True
2,r3,h_179,c_1,2016-09-24 10:03:17,2016-10-19,09:00:00,2016-10-22,2,33600,41,man,35.159315,136.853555,5600,G,G-4,33.599962,130.632019,False


In [14]:
# 집, 호텔 좌표 df 만들기
home_and_hotel_points = reserve_tb.loc[ : , ['home_longitude', 'home_latitude', 'hotel_longitude', 'hotel_latitude']]
home_and_hotel_points.head(3)

Unnamed: 0,home_longitude,home_latitude,hotel_longitude,hotel_latitude
0,136.853555,35.159315,139.701217,35.54586
1,136.853555,35.159315,139.693389,35.644729
2,136.853555,35.159315,130.632019,33.599962


In [15]:
# 적도 반경 설정
g = pyproj.Geod(ellps='WGS84')

In [None]:
# Vincenty 거리 계산
# geopy의 vincenty 거리 계산 메서드는 Deprecated 되어 pyproj 이용
# g.inv : vincenty 거리 계산
home_to_hotel = home_and_hotel_points.apply(lambda x: g.inv(x[0], x[1], x[2], x[3]), axis=1)
home_to_hotel.head(3)

  home_to_hotel = home_and_hotel_points.apply(lambda x: g.inv(x[0], x[1], x[2], x[3]), axis=1)


0    (79.77027493225353, -98.58181358548404, 262356...
1    (77.38752490108185, -100.96709173915401, 26352...
2    (-105.04604262268255, 71.43812931232215, 59765...
dtype: object

In [None]:
# great circle 거리 계산
home_to_hotel_haver = home_and_hotel_points.apply(lambda x: great_circle((x[1], x[0]), (x[3], x[2])).meters, axis=1)
home_to_hotel_haver.head(3)

  home_to_hotel_haver = home_and_hotel_points.apply(lambda x: great_circle((x[1], x[0]), (x[3], x[2])).meters, axis=1)


0    261800.944906
1    262978.942042
2    596577.716142
dtype: float64

In [23]:
# Hubeny 거리 계산 함수
def hubeny(lon1, lat1, lon2, lat2, a=6378137, b=6356752.314245):
    e2 = (a ** 2 - b ** 2) / a ** 2
    (lon1, lat1, lon2, lat2) = [ x * (2 * math.pi) / 360 for x in (lon1, lat1, lon2, lat2)]
    w = 1 - e2 * math.sin((lat1+lat2) / 2) ** 2
    c2 = math.cos((lat1 + lat2) / 2) ** 2
    return math.sqrt((b**2 / w**3) * (lat1 - lat2) ** 2 + (a**2/w)*c2*(lon1-lon2)**2)

home_and_hotel_points.apply(lambda x: hubeny(x[0], x[1], x[2], x[3]), axis=1)

  home_and_hotel_points.apply(lambda x: hubeny(x[0], x[1], x[2], x[3]), axis=1)


0       262390.186582
1       263567.677155
2       597948.485992
3       498654.036378
4       291511.077110
            ...      
4025    534644.527104
4026      8293.928977
4027    533501.714676
4028    313055.146941
4029     21691.391897
Length: 4030, dtype: float64