In [None]:
# IMPORT LIBRARIES --------------------------------------------------------
import pandas as pd
pd.options.mode.chained_assignment = None
import folium
from folium import plugins
from folium.plugins import HeatMap
from sklearn.cluster import DBSCAN

In [None]:
# LOAD DATASET AND PREPROCSSING --------------------------------------------------------
# Note that most of the data cleaning is done via KNIME.

# read in .csv file and create a dataframe
df = pd.read_csv('us_hotels.csv', low_memory=False)

# drop rows with missing attribute (NaN value)
df = df.dropna()

print(df.head(7))
print(f"\nTotal number of unique hotels: {len(df)}")

In [None]:
# SIMPLE FOLIUM VISUALIZATION --------------------------------------------------------

# initialize a map in the middle of the United States
df_map = folium.Map(location=[39.8097343, -98.5556199], zoom_start=3)

# plot points
for index, row in df.iterrows():

    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=1,
        scale_radius=True,
    ).add_to(df_map)

display(df_map)

In [None]:
# RATINGS VISUALIZATION --------------------------------------------------------

# initialize a map in the middle of the United States
df_map = folium.Map(location=[39.8097343, -98.5556199], zoom_start=3)

# plot points
for index, row in df.iterrows():

  # color points based on hotel rating
  if row['HotelRating'] <= 1:
      color = 'red'
  elif ((row['HotelRating'] > 1.1) and (row['HotelRating'] <= 2)):
      color = 'orange'
  elif ((row['HotelRating'] > 2.1) and (row['HotelRating'] <= 3)):
      color = 'yellow'
  elif ((row['HotelRating'] > 3.1) and (row['HotelRating'] <= 4)):
      color = 'limegreen'
  else:
      color = 'green'


  folium.CircleMarker(
      location=[row['Latitude'], row['Longitude']],
      radius=2,
      scale_radius=True,
      color=color,
  ).add_to(df_map)

display(df_map)

In [None]:
# HEATMAP FOLIUM VISUALIZATION --------------------------------------------------------

# initialize a map in the middle of the United States
heatmap = folium.Map(location=[39.8097343, -98.5556199], zoom_start=3)

heat_data = [[row['Latitude'],row['Longitude']] for index, row in df.iterrows()]

# plot points
HeatMap(data=heat_data,
        radius=11,
        blur=5,
        min_opacity=0.15,
        ).add_to(heatmap)

display(heatmap)

In [None]:
# CLUSTERING --------------------------------------------------------

# initialize cluster map
clustermap = folium.Map(location=[39.8097343, -98.5556199], zoom_start=3)

# 30% sample of the data (too much data causes crashes due to exceeding RAM limit)
df_clustering = df.sample(frac=0.3, random_state=2)

coordinates = df[['Latitude', 'Longitude']].values

# need a low eps/min_samples to filter out noise, but need it high enough to provide signifiant cluster data
dbscan = DBSCAN(eps=0.1, min_samples=20)
clusters = dbscan.fit_predict(coordinates)


for i, (lat, lon) in enumerate(coordinates):
    cluster_id = clusters[i]

    # handle noise
    if cluster_id == -1:
        continue #TOGGLE: Show noise or not
        color = 'gray'

    else:
        # assign a color to cluster
        colors = ['blueviolet', 'orangered', 'goldenrod', 'seagreen', 'dodgerblue', 'darkslategrey', 'mediumvioletred']
        color = colors[cluster_id % len(colors)]

    folium.CircleMarker(location=[lat, lon],
                        radius=2,
                        color=color,
                        fill=True,
                        fill_color=color).add_to(clustermap)

clustermap