# Google Maps API - Neighborhood Venues for Housing Price Prediction

In this notebook we are retrieving information via Google Maps API, in order to gain some information about the "trendiness" of the neighborhood. 

The data will be combined with the house data.

In [2]:
# libaries
import pandas as pd
import requests

from urllib.parse import urlencode
import googlemaps
import json
import pprint
import time
import pandas as pd
import numpy as np
import ast


In [3]:
# Read dataset for locations we are looking information from
houses = pd.read_csv('final_houses.csv')

In [59]:
post_code_loc = houses.groupby('post_code')[['loc']].min().reset_index()
len(post_code_loc.post_code.unique())

1666

In [62]:
# Get a list of the unique locations
locations = []

for each in post_code_loc['loc'].unique():
    loc = each.replace('[', '').replace(']', '')
    if loc not in ["-, -"]:
        locations.append(loc)

In [66]:
# Set up the Google API client
API_KEY = ""
gmaps = googlemaps.Client(key = API_KEY)

# Find plabes nearby

results = []

for l in locations:
    try:
        res_main=gmaps.places_nearby(location=l,type=["restaurant","cafe", "bar"],radius=1000)
        results.append(res_main)
    except:
        pass
    time.sleep(3)


In [67]:
# Save query to text file so we don't have to request again
with open('api_result_2.txt', 'w') as f:
     f.write(json.dumps(results))

In [4]:
# Open the file again to carry out analysis

with open('api_result_2.txt') as json_file:
    data = json.load(json_file)


In [5]:
# Create a new dataframe for our results
loc_df = pd.DataFrame(columns=['place_id', 'name', 'rating', 'rating_count', 'type', 'coord'])

In [None]:
# Pick up the results from our json file
for result in data:
    for r in result['results']:
        place_id = r['place_id']
        name = r['name']
        try:
            rating = r['rating']
        except: 
            rating = np.nan 
        try:
            rating_count = r['user_ratings_total']
        except:
            rating_count = np.nan
        location = r['geometry']
        coord = location['location']
        types = r['types']
                
        loc_df = loc_df.append({'place_id':place_id, 'name':name, 'rating':rating, 'rating_count':rating_count, 'type':types, 'coord':coord},ignore_index=True)
        


# Exploring and cleaning the venue dataset

In [128]:
# We will drop duplicate values as we will be using this dataframe to calculate distances to house locations
loc_unique = loc_unique.drop_duplicates(subset=['place_id'])

In [107]:
loc_unique.head()

Unnamed: 0,place_id,name,rating,rating_count,type,coord
0,ChIJVXealLU_xkcRja_At0z9AGY,Amsterdam,,,"[locality, political]","{'lat': 52.3666969, 'lng': 4.8945398}"
1,ChIJZyc7C7oJxkcRSPqsBgyVa0k,DoubleTree by Hilton Amsterdam Centraal Station,4.4,5081.0,"[lodging, point_of_interest, establishment]","{'lat': 52.3769633, 'lng': 4.904821799999999}"
2,ChIJQ_tKzrkJxkcRa2Yg-YSRmA4,Hotel NH Collection Amsterdam Barbizon Palace,4.4,1658.0,"[lodging, point_of_interest, establishment]","{'lat': 52.3764175, 'lng': 4.900220099999999}"
3,ChIJyYzEx7wJxkcRLiQturn0qeA,Hotel ibis Amsterdam Centre Stopera,4.1,1995.0,"[lodging, restaurant, food, point_of_interest,...","{'lat': 52.3700728, 'lng': 4.907650900000001}"
4,ChIJs01eRrgJxkcR5jnrl3llcqM,Hotel CC Amsterdam,3.9,742.0,"[lodging, point_of_interest, establishment]","{'lat': 52.375656, 'lng': 4.8984916}"


In [129]:
# Check for datatypes
loc_unique.dtypes

In [109]:
loc_unique['rating_count'] = loc_unique['rating_count'].astype(float)

(1670, 6)

In [130]:
# Check for null value
loc_unique.isnull().sum()

In [None]:
# Drop null values 
loc_unique.dropna(inplace=True)

In [131]:
loc_unique['coord'] = loc_unique.coord.apply(lambda x: str(x['lat'])+ ", " + str(x['lng']))

In [132]:
loc_unique.head()

In [134]:
# We keep only venues where rating count is 5 - to have more reasonable justification for trends
loc_unique = loc_unique[loc_unique.rating_count > 5]

In [135]:
# Final dataset shape
loc_unique.shape

(973, 6)

In [138]:
# Save file for further analysis
loc_unique.to_csv('venue_info_2.csv', index=False)