# Explore Geospatial Aspects of the Dataset

In [5]:

import pandas as pd
import folium
from folium.plugins import FastMarkerCluster, HeatMap
import utils

CONFIG_FILE = "01-03-geospatial-aspects_config.yml"


In [2]:
# Load Notebook Config
config = utils.load_config(CONFIG_FILE)
config

{'general': {'load_from_scratch': False,
  'save_raw_dataframe': False,
  'save_transformed_dataframe': False,
  'remove_bad_values': True},
 'columns': {'categorical': ['neighbourhood_group',
   'neighbourhood',
   'room_type'],
  'continuous': ['minimum_nights',
   'number_of_reviews',
   'reviews_per_month',
   'calculated_host_listings_count',
   'latitude',
   'longitude'],
  'date': ['last_review'],
  'text': ['name', 'host_name'],
  'excluded': ['price', 'id']},
 'bounding_box': {'max_long': -73.70018092,
  'max_lat': 40.91617849,
  'min_long': -74.25909008,
  'min_lat': 40.47739894},
 'newark_bounding_box': {'max_long': -74.11278706,
  'max_lat': 40.67325015,
  'min_long': -74.25132408,
  'min_lat': 40.78813864},
 'geo_columns': ['latitude', 'longitude'],
 'file_names': {'input_csv': '../data/AB_NYC_2019.csv',
  'pickle_input_dataframe': '../data/AB_NYC_2019_input_13_sep_2023.pkl',
  'pickle_output_dataframe': '../data/AB_NYC_2019_output_13_sep_2023.pkl'}}

In [3]:
# Read data
clean_df = pd.read_pickle(config["file_names"]["pickle_output_dataframe"])

## Visualize the listings in the dataset by location cluster.

In [4]:

mean_location = clean_df[["latitude", "longitude"]].mean()
f = folium.Figure(width=1000, height=500)
m = folium.Map(location=mean_location, tiles="Stamen Toner", zoom_start=10, zoom_min=8, max_bounds=True).add_to(f)
FastMarkerCluster(data=clean_df[["latitude", "longitude", "name"]]).add_to(m)

f

## Heat map by location

In [6]:
f = folium.Figure(width=1000, height=500)
m = folium.Map(location=mean_location, tiles="Stamen Toner", zoom_start=10, zoom_min=8, max_bounds=True).add_to(f)
HeatMap(data=clean_df[["latitude", "longitude"]]).add_to(m)

f

## Heat Map by price

In [15]:
clean_df.groupby(["latitude", "longitude"])[["price"]].sum().reset_index().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48871 entries, 0 to 48870
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   latitude   48871 non-null  float64
 1   longitude  48871 non-null  float64
 2   price      48871 non-null  int64  
dtypes: float64(2), int64(1)
memory usage: 1.1 MB


In [19]:
f = folium.Figure(width=1000, height=500)
m = folium.Map(location=mean_location, tiles="Stamen Toner", zoom_start=10, zoom_min=8, max_bounds=True).add_to(f)
HeatMap(
    data=clean_df.groupby(["latitude", "longitude"])[["price"]].sum().reset_index().values.tolist(),
    radius=8,
).add_to(m)

f