In [1]:
import requests
import json
import time
import csv
import pandas as pd

Step 1: Setup Functions to Retrieve Data via GET and Count Shops/Store Shop Info

In [2]:
#API KEY: ENTER API KEY HERE
# retrives data via GET request
class GooglePlaces(object):
    def __init__(self, apiKey):
        super(GooglePlaces, self).__init__()
        self.apiKey = apiKey

    def search_places_by_coordinate(self, location, radius, types):
        endpoint_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
        places = []
        params = {
            'location': location,
            'radius': radius,
            'types': types,
            'key': self.apiKey
        }
        res = requests.get(endpoint_url, params = params)
        results =  json.loads(res.content)
        places.extend(results['results'])
        time.sleep(2)
        while "next_page_token" in results:
            params['pagetoken'] = results['next_page_token'],
            res = requests.get(endpoint_url, params = params)
            results = json.loads(res.content)
            places.extend(results['results'])
            time.sleep(2)
        return places

    def get_place_details(self, place_id, fields):
        endpoint_url = "https://maps.googleapis.com/maps/api/place/details/json"
        params = {
            'placeid': place_id,
            'fields': ",".join(fields),
            'key': self.apiKey
        }
        res = requests.get(endpoint_url, params = params)
        place_details =  json.loads(res.content)
        return place_details

In [28]:
# TEST GROUND 1 - WORKED!!! :)
# This compiles the list of cafes within 4500m -> 4.5km around the latitude and longitude of the universities main address
    
def get_shops(uni_name, uni_coordinates):
    places = api.search_places_by_coordinate(uni_coordinates, "45000", "cafe")
    count = 0
    
    for place in places:
        count = count + 1
        details = api.get_place_details(place['place_id'], fields)
        
        try:
            name = details['result']['name']
        except KeyError:
            name = ""

        try:
            address = details['result']['formatted_address']
        except KeyError:
            address = ""

        try:
            reviews = details['result']['reviews']
        except KeyError:
            reviews = []
        
        #print(name)
        writer.writerow({'University': uni_name,'Name': name, 'Address': address})
        
    return count

Step 2: Reading in List of Universities 

In [4]:
df = pd.read_csv ('QSWorldRanking.csv')
df.head(5)

Unnamed: 0,QS World University Rankings by Subject,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,Mathematics,,,,,,,,,,,,,,
1,2021,2020.0,Institution,Location,Academic,Employer,Citations,H,Score,# coffee shops (5km),Sent Survey,email,latitude,longitude,# coffee shops (4.5km)
2,1,1.0,Massachusetts Institute of Technology (MIT),United States,100.0,98.0,85.9,98.7,96.5,8.0,Y,dept_head: goemans@math.mit.edu,42.360265,-71.094160,
3,2,3.0,Stanford University,United States,96.5,95.6,93.1,100.0,96.3,27.0,y,mazzeo@math.stanford.edu,37.427645,-122.169730,
4,3,2.0,Harvard University,United States,97.4,100.0,89.7,95.7,96.0,20.0,Y,mainoffice@math.harvard.edu / jpajovic@math.h...,42.374681,-71.109557,


In [5]:
df2 = df.drop(['Unnamed: 3','Unnamed: 4', 'Unnamed: 5','Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'], axis = 1) # dropping columns irrelevant to location
df2 = df2.drop([0]) # dropping useless row
df2 = df2.rename(columns=df2.iloc[0]).drop(df2.index[0]) # using row as header titles

df2.head(5)

Unnamed: 0,2021,2020,Institution,latitude,longitude,# coffee shops (4.5km)
2,1,1,Massachusetts Institute of Technology (MIT),42.360265,-71.09416,
3,2,3,Stanford University,37.427645,-122.16973,
4,3,2,Harvard University,42.374681,-71.109557,
5,4,4,University of Cambridge,,,
6,5,5,University of Oxford,,,


In [6]:
df3 = df2[df2['latitude'].notna()] # taking only US universities with found lat/long
df3.head(5)

Unnamed: 0,2021,2020,Institution,latitude,longitude,# coffee shops (4.5km)
2,1,1,Massachusetts Institute of Technology (MIT),42.360265,-71.09416,
3,2,3,Stanford University,37.427645,-122.16973,
4,3,2,Harvard University,42.374681,-71.109557,
7,6,6,"University of California, Berkeley (UCB)",37.87184,-122.258647,
8,7,7,Princeton University,40.345254,-74.655396,


In [7]:
df3["uni_coord"] = df3.latitude.str.cat(df3.longitude, ', ')
df3.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3["uni_coord"] = df3.latitude.str.cat(df3.longitude, ', ')


Unnamed: 0,2021,2020,Institution,latitude,longitude,# coffee shops (4.5km),uni_coord
2,1,1,Massachusetts Institute of Technology (MIT),42.360265,-71.09416,,"42.360265, -71.094160"
3,2,3,Stanford University,37.427645,-122.16973,,"37.427645, -122.169730"
4,3,2,Harvard University,42.374681,-71.109557,,"42.374681, -71.109557"


3. Running Main Code, Iterating Through List of All Universities

In [31]:
api = GooglePlaces("ENTER API KEY HERE")
fields = ['name', 'formatted_address', 'international_phone_number', 'website', 'rating', 'review']
fieldnames = ['University', 'Name', 'Address']

# create file to save information
coffee_count = []
with open('CoffeInfo.csv', mode='w', newline='', encoding="utf-8") as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    
    for i in range(0, 92):
        temp_count = get_shops(df3.iloc[i,2], df3.iloc[i,6])
        #print(temp_count)
        coffee_count.append(temp_count)

60
60
60
60
36
60
60
60
60
60
60
60
60
60
60
57
60
60
60
60
60
60
60
60
52
60
36
60
59
59
60
60
60
60
60
60
60
55
60
47
35
60
60
60
60
37
37
60
60
60
5
60
60
60
60
60
48
60
60
60
51
60
54
60
56
23
60
60
47
58
30
60
40
38
41
45
25
0
56
38
25
60
31
45
60
32
60
60
60
60
60
60
