In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import helpers

In [196]:
listings = pd.read_csv('datasets/deduped_listings_updated.csv')
pois = pd.read_csv('datasets/foursquare_pois.csv')
food_pois = pd.read_csv('datasets/foursquare_pois_food.csv')
nightlife_pois = pd.read_csv('datasets/foursquare_pois_nightlife.csv')
travel_pois = pd.read_csv('datasets/foursquare_pois_travel.csv')

In [197]:
geo_listings = helpers.to_gdf(helpers.clean_latlon(listings, 'longitude', 'latitude'))
geo_pois = helpers.to_gdf(helpers.clean_latlon(pois, 'venue.location.lng', 'venue.location.lat'))
geo_pois_food = helpers.to_gdf(helpers.clean_latlon(food_pois, 'venue.location.lng', 'venue.location.lat'))
geo_pois_nightlife = helpers.to_gdf(helpers.clean_latlon(nightlife_pois, 'venue.location.lng', 'venue.location.lat'))
geo_pois_travel = helpers.to_gdf(helpers.clean_latlon(travel_pois, 'venue.location.lng', 'venue.location.lat'))

In [211]:
# Point of interest features
geo_listings['idpop_local_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois, metric = 'sum', axis = 1)
geo_listings['idpop_citywide_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois, citywide = 1, metric = 'sum', axis = 1)
geo_listings['idpop_local_food_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois_food, citywide = 0, metric = 'sum', axis = 1)
geo_listings['idpop_citywide_food_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois_food, citywide = 1, metric = 'sum', axis = 1)
geo_listings['idpop_local_nightlife_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois_nightlife, citywide = 0, metric = 'sum', axis = 1)
geo_listings['idpop_citywide_nightlife_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois_nightlife, citywide = 1, metric = 'sum', axis = 1)
geo_listings['idpop_local_travel_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois_travel, citywide = 0, metric = 'sum', axis = 1)
geo_listings['idpop_citywide_travel_sum'] = geo_listings.apply(helpers.idw_popularity, poi_gdf = geo_pois_travel, citywide = 1, metric = 'sum', axis = 1)

In [218]:
geo_listings.to_csv('datasets/listings_with_poifeats.csv')

In [214]:
import importlib
importlib.reload(helpers)

def count_within_radius(listing, points, radius_meters):
    selected = helpers.filter_to_radius(listing, points, radius_meters)
    return(len(selected.index))

In [219]:
# Count number of venues within 500m of a listing
# This takes a really long time (~12 hours per feature)
# Only able to get through general venue count
geo_listings['venue_count_500m'] = geo_listings.apply(count_within_radius, points = geo_pois, radius_meters = 500, axis = 1)
geo_listings['food_count_500m'] = geo_listings.apply(count_within_radius, points = geo_pois_food, radius_meters = 500, axis = 1)