## 民泊サービスの宿泊料金予測
- 民宿と駅の距離を測定
- train_kari.csvとtest_kari.csvの作成

### Google Driveのマウント

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/'My Drive'

Mounted at /content/drive
/content/drive/My Drive


### ライブラリのインストール、インポート

In [None]:
# カレントディレクトリを変更
import os
os.chdir('/content/drive/My Drive/Probdata/airbnb/')
print(os.getcwd())

/content/drive/My Drive/Probdata/airbnb


In [None]:
class Config():
    root_path = './'
    input_path = os.path.join(root_path, 'input')
    output_path = os.path.join(root_path, 'output')
    intermediate_path = os.path.join(root_path, 'intermediate')
    seed = 42
    debug = False

In [None]:
import pandas as pd
import numpy as np
import warnings
import datetime

import scipy.stats as stats
from geopy.distance import geodesic

### データの読み込み

In [None]:
train_df = pd.read_csv(f'{Config.input_path}/train_data.csv')
test_df = pd.read_csv(f'{Config.input_path}/test_data.csv')
station_list_df = pd.read_csv(f'{Config.input_path}/station_list.csv')

In [None]:
train_df = train_df.fillna(0)
test_df = test_df.fillna(0)

## 民宿と駅の距離の測定

In [None]:
# 民泊毎の駅との距離
def distance_station(data):
    sta_nm_1_list = []
    sta_nm_2_list = []
    sta_nm_3_list = []
    dis_200_list = []
    dis_500_list = []
    dis_1000_list = []
    for i in range(len(data)):
        airbnb = (data.iloc[i]['latitude'], data.iloc[i]['longitude'])
        dis_sta = {}
        for k in range(len(station_list_df)):
            station = (station_list_df.iloc[k]['latitude'], station_list_df.iloc[k]['longitude'])
            dis = geodesic(airbnb, station).m
            dis_sta[station_list_df.iloc[k]['station_name']] = dis
        dis_sta_list = sorted(dis_sta.items(), key=lambda x:x[1], reverse=False)
        sta_cnt = 0
        sta_nm_1 = '　'
        sta_nm_2 = '　'
        sta_nm_3 = '　'
        dis_200 = 0
        dis_500 = 0
        dis_1000 = 0
        for sta_dis in dis_sta_list:
            if sta_cnt < 3:
                if sta_dis[1] <= 1000.0:
                    if sta_cnt == 0:
                        sta_nm_1 = sta_dis[0]
                    elif sta_cnt == 1:
                        sta_nm_2 = sta_dis[0]
                    else:
                        sta_nm_3 = sta_dis[0]
                    sta_cnt += 1
            if sta_dis[1] <= 200.0:
                dis_200 += 1
            elif sta_dis[1] <= 500.0:
                dis_500 += 1
            elif sta_dis[1] <= 1000.0:
                dis_1000 +=1
            else:
                break
        sta_nm_1_list.append(sta_nm_1)
        sta_nm_2_list.append(sta_nm_2)
        sta_nm_3_list.append(sta_nm_3)
        dis_200_list.append(dis_200)
        dis_500_list.append(dis_500)
        dis_1000_list.append(dis_1000)
    tmp_data = pd.DataFrame(sta_nm_1_list, columns=['sta_nm_1'])
    tmp_data['sta_nm_2'] = sta_nm_2_list
    tmp_data['sta_nm_3'] = sta_nm_3_list
    tmp_data['dis_200'] = dis_200_list
    tmp_data['dis_500'] = dis_500_list
    tmp_data['dis_1000'] = dis_1000_list
    data = pd.concat([data, tmp_data], axis=1)
    
    return data

In [None]:
# 駅との距離をtrain_dfとtest_dfにセット
train_df = distance_station(train_df)
test_df = distance_station(test_df)

In [None]:
# train_dfファイルの出力(Google Driveに出力）
train_df.to_csv(os.path.join(Config.output_path, "train_kari.csv"), index=False)
test_df.to_csv(os.path.join(Config.output_path, "test_kari.csv"), index=False)