In [53]:
import math
import numpy as np
import urllib2
import json
from time import sleep
%matplotlib inline
import matplotlib.pylab as plt

In [54]:
#get the set of points in a bounding box
# Description: 
# This function takes in the bounding box of the area 
# for which we intend to crawl the APIs. The function 
# returns grid points that span the bounding box. This 
# is useful because Google Places API require a point
# to search around it.

def GetSearchPoints(boundingBox,radius):
    earthCircumfurance=2*math.pi*6371;#earth's radius
    deltaCoordinate=360*(radius/earthCircumfurance);
    minLat=np.min([boundingBox[0],boundingBox[2]]);
    minLon=np.min([boundingBox[1],boundingBox[3]]);
    maxLat=np.max([boundingBox[0],boundingBox[2]]);
    maxLon=np.max([boundingBox[1],boundingBox[3]]);

    latPoints=np.ceil(np.absolute(maxLat-minLat)/deltaCoordinate);
    lonPoints=np.ceil(np.absolute(maxLon-minLon)/deltaCoordinate);

    points=np.max([latPoints,lonPoints]);
    lat_points=np.linspace(boundingBox[0],boundingBox[2],points);
    lon_points=np.linspace(boundingBox[1],boundingBox[3],points);
 
    lat_mesh,lon_mesh=np.meshgrid(lat_points,lon_points);
    lat_points=np.ndarray.flatten(lat_mesh);
    lon_points=np.ndarray.flatten(lon_mesh);
    
    return lat_points,lon_points;



In [55]:
# This function does a nearby search on Google places API
# It takes a coordinate and the radius desired for search
# It returns a list of places with their features.
# for reference check: https://developers.google.com/places/web-service/search#PlaceSearchRequests
#
def fetchFromGooglePOI(point,radius,googlePlacesKEY):
    radiusInMeters=radius*1000;
    allDataFetched=False;
    data=[];
    url_='https://maps.googleapis.com/maps/api/place/nearbysearch/json?location='+str(point[0][0])+','+str(point[0][1])+'&radius='+str(radiusInMeters)+'&key='+googlePlacesKEY;

    i=1;
    while(not allDataFetched):
        #sending google a request
        response = urllib2.urlopen(url_)
        
        #getting back data in json format
        html = response.read()
        cleaned=html.replace("\n","")
        json_poi=json.loads(cleaned)
        results=json_poi['results']

        if json_poi.has_key('next_page_token'):# this means that there are more POIs to fetch, need to wait a little before sending another request
            sleep(1)
            url_='https://maps.googleapis.com/maps/api/place/nearbysearch/json?pagetoken='+str(json_poi['next_page_token'])+'&key='+str(googlePlacesKEY); 
        else:
            allDataFetched=True;
            
        if len(data)==0 and len(results)!=0:
            data=results;
        elif len(results)!=0:
            data=data+results;
            
        #remind the user that the radius was too large to gather all points    
        if i>2:
            print('Radius was too large to gathered all places at {} where there are more than {} places'.format(point,len(data)))
        i=i+1;
    return data;

# Step1: Generate a grid of points

In [56]:
#SET THE PARAMETERS FOR PLACE AND RADIUS
boundingBox=[37.874363, -122.269306,37.868859, -122.253050];
radiusOfQuery=0.3; # in km
#################################################################
#for this code to work, a google API key is needed, please register 
# and enable the google places api for your account
# https://developers.google.com/maps/documentation/javascript/get-api-key
googlePlacesKEY='AIzaSyCRCYMItlzMipkpEifTRHGNLaGAR90Vpfc'
#################################################################


#this generates grid points radius meters apart
lat,lon=GetSearchPoints(boundingBox,radiusOfQuery);
print("number of points in the grid are {}".format(len(lat)))

number of points in the grid are 49



# Step2: Query Google API for every point

In [58]:
#This starts crawling Google API
#This code could take a while
#to test the code you can set below QueryLimit
#This will limit the number of sent queries and finish faster
QueryLimit=-1; #negative means query all points

data={}
inc=0;
for i in range(0,len(lat)):
    pointCoordinate=[tuple([lat[i],lon[i]])];
    queryResults=fetchFromGooglePOI(pointCoordinate,radiusOfQuery,googlePlacesKEY);
    for place in queryResults:
        if place['place_id'] not in data:
            inc+=1;
            data[place['place_id']]=place;

    if i>QueryLimit and QueryLimit>0:
        break;
    if i%(len(lat)/10)==0:
        print ('crawled {} points of {}'.format(i,len(lat)))
print("---------")
print('done. Gathered {} POIs'.format(inc))

crawled 0 points of 49
crawled 4 points of 49
Radius was too large to gathered all places at [(37.874363000000002, -122.26659666666667)] where there are more than 40 places
crawled 8 points of 49
crawled 12 points of 49
Radius was too large to gathered all places at [(37.873445666666669, -122.26388733333333)] where there are more than 40 places
crawled 16 points of 49
Radius was too large to gathered all places at [(37.871611000000001, -122.26388733333333)] where there are more than 40 places
Radius was too large to gathered all places at [(37.869776333333334, -122.26388733333333)] where there are more than 40 places
crawled 20 points of 49
Radius was too large to gathered all places at [(37.874363000000002, -122.261178)] where there are more than 40 places
Radius was too large to gathered all places at [(37.872528333333335, -122.261178)] where there are more than 40 places
crawled 24 points of 49
Radius was too large to gathered all places at [(37.870693666666668, -122.261178)] where 

# Step3: Save the data to JSON file

In [None]:
#set the name of the file as you see fit
with open('BerkeleyPlaces.json', 'w') as fp:
    json.dump(data, fp)

# Viewing the crawled data: viewing places names

In [59]:
#this prints the names of places gathered from Google API
for a in data:
    print(data[a]['name'])

Stadium Garage (Impark)
Heyday
University of California, Berkeley Libraries
Caffè Strada
Hearst Mining Circle
Chapel of St Demetrios
Evans Diamond
Edwards Stadium
Koret Visitor Center
University Health Services: Plum Carole M
Menchie's Frozen Yogurt. Berkeley’s Only Self Serve Frozen Yogurt
Beta Lounge
The Magnes Collection of Jewish Art and Life
Blow Salon
California Memorial Stadium
Hertz Hall
roundCorner, Inc
Doe Memorial Library
Cultivating Capital
Institute of Salesian Studies at Don Bosco Hall
Yogurt Park
K Street Flats
Urbann Turbann
Hotel Shattuck Plaza
South Hall
Cinnaholic
REALM Middle School
Half Price Books
Berkeley Law Admissions Office
Charles Schwab
Downtown Berkeley Inn
Taste of the Himalayas
Hearst Tennis Courts
Cal1 Card Office
Copy Central Square
Pimentel Hall
Northside Cafe
CREAM Berkeley
Graduate School of Education
SOD Treatment Training
California Jazz Conservatory
Nash Hotel
Birge Hall
Haas School of Business
International House Cafe
Berkeley Espresso
Julia's Re