# Loading more information about the hotels

## Extract data from the metadata table in the sqlite database

In [1]:
# Dependencies
import pandas as pd
import numpy as np
import sqlite3
from IPython.core.display import clear_output

In [2]:
# Create a connection to the database
conn = sqlite3.connect("Data/Hotels.db")

# Load the database table into a pandas dataframe
metadata = pd.read_sql_query("select * from metadata;", conn)
conn.close()

# Preview the dataframe
metadata.head()

Unnamed: 0,index,name,categories,primaryCategories,address,city,province,latitude,longitude,websites
0,0,Rancho Valencia Resort Spa,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,5921 Valencia Cir,Rancho Santa Fe,CA,32.990959,-117.186136,http://www.ranchovalencia.com
1,3,Aloft Arundel Mills,"Hotels,Hotels and motels,Travel agencies and b...",Accommodation & Food Services,7520 Teague Rd,Hanover,MD,39.155929,-76.716341,http://www.starwoodhotels.com/alofthotels/prop...
2,9,Hampton Inn Suites PortlandVancouver,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,315 SE Olympia Dr,Vancouver,WA,45.619212,-122.525196,http://hamptoninn3.hilton.com/en/hotels/washin...
3,15,Hotel Phillips,"Hotels,Caterers,Hotels and motels,Hotel,Restau...",Accommodation & Food Services,106 W 12th St,Kansas City,MO,39.100119,-94.584701,http://curiocollection3.hilton.com/en/hotels/m...
4,20,The Inn at Solvang,"Bed Breakfasts,Bed Breakfast,Hotels Motels,Hotel",Accommodation & Food Services,10611 Standing Stone Rd,Huntingdon,PA,40.527478,-77.969763,http://www.solvang.com


## Add hotel characteristics

### Distance from airport

In [3]:
# Get latitudes and longitudes of airports mapped in the USA
# Source: https://opendata.socrata.com/dataset/Airport-Codes-mapped-to-Latitude-Longitude-in-the-/rxrh-4cxm
path = "Data/Airport_Codes_Coords_USA.csv"

airports = pd.read_csv(path)
airports.head()

Unnamed: 0,locationID,Latitude,Longitude
0,ADK,51.8781,176.6461
1,AKK,56.9386,154.1825
2,Z13,60.9047,161.4225
3,AKI,60.9028,161.2306
4,AUK,62.68,164.66


In [4]:
print(f"Number of airports mapped in USA: {airports.shape[0]}")

Number of airports mapped in USA: 13429


In [5]:
# Haversine formula to calculate distance
# Source1: https://stackoverflow.com/a/41337005
# Source2: https://stackoverflow.com/a/21623206
from math import cos, asin, sqrt

def distance(lat1, lon1, lat2, lon2):
    """distance is expressed in km"""
    p = 0.017453292519943295 # pi/180; factor to convert degrees to radians
    a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p)*cos(lat2*p) * (1-cos((lon2-lon1)*p)) / 2
    return 12742 * asin(sqrt(a)) # Earth diameter: 12742 = 2 * R; R = 6371km (mean radius of the earth)

# Find the minimum distance between the hotel and the closest airport
def min_distance(lat, lon):
    distances = []
    for i in range(0, len(airports)):
        away = distance(abs(lat), abs(lon), abs(airports["Latitude"][i]), abs(airports["Longitude"][i]))
        distances.append(away)
    return min(distances)

In [6]:
# For each hotel coordinate, calculate the distance to the nearest airport
airport_distance = []
for i in range(0, len(metadata)):
    dist = min_distance(metadata["latitude"][i], metadata["longitude"][i])
    airport_distance.append(dist)
    
    print(f"Now processing {i}th airport.\n-----")
    clear_output(wait = True) # to replace output with new one (instead of printing many outputs)

Now processing 1852th airport.
-----


In [7]:
# Add airport distance in metadata
metadata["airportDistance_km"] = airport_distance

# Preview the dataframe
metadata.head()

Unnamed: 0,index,name,categories,primaryCategories,address,city,province,latitude,longitude,websites,airportDistance_km
0,0,Rancho Valencia Resort Spa,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,5921 Valencia Cir,Rancho Santa Fe,CA,32.990959,-117.186136,http://www.ranchovalencia.com,14.308848
1,3,Aloft Arundel Mills,"Hotels,Hotels and motels,Travel agencies and b...",Accommodation & Food Services,7520 Teague Rd,Hanover,MD,39.155929,-76.716341,http://www.starwoodhotels.com/alofthotels/prop...,4.668332
2,9,Hampton Inn Suites PortlandVancouver,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,315 SE Olympia Dr,Vancouver,WA,45.619212,-122.525196,http://hamptoninn3.hilton.com/en/hotels/washin...,6.5919
3,15,Hotel Phillips,"Hotels,Caterers,Hotels and motels,Hotel,Restau...",Accommodation & Food Services,106 W 12th St,Kansas City,MO,39.100119,-94.584701,http://curiocollection3.hilton.com/en/hotels/m...,2.670645
4,20,The Inn at Solvang,"Bed Breakfasts,Bed Breakfast,Hotels Motels,Hotel",Accommodation & Food Services,10611 Standing Stone Rd,Huntingdon,PA,40.527478,-77.969763,http://www.solvang.com,3.781817


### Hotel features

In [8]:
# Create new columns containing categories for the hotels
new_col = ["Motel", "Cottage", "Cabin", "Hotel", "Caterer", "Resort", "Restaurant", "Bed", "Spa", "Banquet", 
           "Concierge Service", "Golf Course", "Cable Internet", "Pool", "Water Parks", "Family-Friendly", "Casino", 
           "Beach", "Luxury Hotels", "Business Hotels", "Conference Room", "Event Space", "Convention", 
           "Boutique Hotels", "Clinics", "Inn", "Concert Hall", "E-Commerce", "Extended Stay", "Fairgrounds", 
           "Harbor", "Marina", "Lounge", "Medical", "Movie", "Pet Friendly", "Ski", "Timeshare", "Yacht clubs", 
           "Apartment"]

# Sort the items in the list alphabetically
new_col.sort()

for col in new_col:
    metadata[col] = np.nan

In [9]:
# Create a function that fills in 1s and 0s for selected categories
def Cat_encoding(category):
    for i in range(0,len(metadata)):
        if category not in metadata["primaryCategories"][i]:
            metadata[str(category)][i] = 0
        else:
            metadata[str(category)][i] = 1 
        
        print(f"Now processing {i}th hotel for {category}.\n-----")
        clear_output(wait = True) # to replace output with new one (instead of printing many outputs)  

In [10]:
# Loop the primaryCat_encoding function to fill the empty categories
for x in new_col:
    Cat_encoding(x) 

Now processing 1852th hotel for Yacht clubs.
-----


In [11]:
metadata.head()

Unnamed: 0,index,name,categories,primaryCategories,address,city,province,latitude,longitude,websites,...,Movie,Pet Friendly,Pool,Resort,Restaurant,Ski,Spa,Timeshare,Water Parks,Yacht clubs
0,0,Rancho Valencia Resort Spa,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,5921 Valencia Cir,Rancho Santa Fe,CA,32.990959,-117.186136,http://www.ranchovalencia.com,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,Aloft Arundel Mills,"Hotels,Hotels and motels,Travel agencies and b...",Accommodation & Food Services,7520 Teague Rd,Hanover,MD,39.155929,-76.716341,http://www.starwoodhotels.com/alofthotels/prop...,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,9,Hampton Inn Suites PortlandVancouver,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,315 SE Olympia Dr,Vancouver,WA,45.619212,-122.525196,http://hamptoninn3.hilton.com/en/hotels/washin...,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,15,Hotel Phillips,"Hotels,Caterers,Hotels and motels,Hotel,Restau...",Accommodation & Food Services,106 W 12th St,Kansas City,MO,39.100119,-94.584701,http://curiocollection3.hilton.com/en/hotels/m...,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,20,The Inn at Solvang,"Bed Breakfasts,Bed Breakfast,Hotels Motels,Hotel",Accommodation & Food Services,10611 Standing Stone Rd,Huntingdon,PA,40.527478,-77.969763,http://www.solvang.com,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Load the metadata with new information into a new database table 

In [18]:
# Create a connection to the database
conn = sqlite3.connect("Data/Hotels.db")

# Create a database table each for ratings and for metadata
metadata.to_sql("metadata2", conn, if_exists = "replace", index = False)

  dtype=dtype)


In [19]:
# Preview metadata
pd.read_sql_query("select * from metadata2 limit 3;", conn)

Unnamed: 0,index,name,categories,primaryCategories,address,city,province,latitude,longitude,websites,...,Movie,Pet Friendly,Pool,Resort,Restaurant,Ski,Spa,Timeshare,Water Parks,Yacht clubs
0,0,Rancho Valencia Resort Spa,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,5921 Valencia Cir,Rancho Santa Fe,CA,32.990959,-117.186136,http://www.ranchovalencia.com,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3,Aloft Arundel Mills,"Hotels,Hotels and motels,Travel agencies and b...",Accommodation & Food Services,7520 Teague Rd,Hanover,MD,39.155929,-76.716341,http://www.starwoodhotels.com/alofthotels/prop...,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,9,Hampton Inn Suites PortlandVancouver,"Hotels,Hotels and motels,Hotel and motel reser...",Accommodation & Food Services,315 SE Olympia Dr,Vancouver,WA,45.619212,-122.525196,http://hamptoninn3.hilton.com/en/hotels/washin...,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
# Close the connection
conn.close()