## Using Foursquare API to extract parks data in Berlin ##

#### 1. Importing Necessary Libraries ####

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


#### 2.Loading CSV file berlin_locaclities which was stored in the last part of data_extraction file ####

In [3]:
df = pd.read_csv(r"C:\Users\GAURI TOSHNIWAL\Documents\zcoursera\Capstone\final_project\berlin_localities.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,Locality,Area in km²,Population as of 2008,Density inhabitants per km²,latitude,longitude
0,0,(0101) Mitte,10.7,79582,7445,52.51769,13.402376
1,1,(0102) Moabit,7.72,69425,8993,52.530102,13.342542
2,2,(0103) Hansaviertel,0.53,5889,11111,52.519123,13.341873
3,3,(0104) Tiergarten,5.17,12486,2415,52.5145,13.3501
4,4,(0105) Wedding,9.23,76363,8273,52.550123,13.34197


In [5]:
#Removing unnecessary Column
del df['Unnamed: 0']

In [6]:
df.head()

Unnamed: 0,Locality,Area in km²,Population as of 2008,Density inhabitants per km²,latitude,longitude
0,(0101) Mitte,10.7,79582,7445,52.51769,13.402376
1,(0102) Moabit,7.72,69425,8993,52.530102,13.342542
2,(0103) Hansaviertel,0.53,5889,11111,52.519123,13.341873
3,(0104) Tiergarten,5.17,12486,2415,52.5145,13.3501
4,(0105) Wedding,9.23,76363,8273,52.550123,13.34197


#### 3.Plotting all Localities on the Berlin Map ####

In [31]:
# Finding Coordinates of Berlin City
address = 'Berlin'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Berlin City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Berlin City are 52.5170365, 13.3888599.


In [32]:
# creating map of Berlin City using latitude and longitude values
map_berlin = folium.Map(location=[latitude, longitude], zoom_start=12)

# adding markers to map
for lat, lng, Locality in zip(df['latitude'], df['longitude'],df['Locality']):
    label = '{}'.format(Locality)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_berlin)  
map_berlin

#### 4. Using Foursquare API for finding parks in each Locality ####

In [10]:
CLIENT_ID = 'CVDUCJDQZN2GITGJSRMOA22KTGC4DV10JWSXB3BNVLVCY3FN' # your Foursquare ID
CLIENT_SECRET = 'SAOHDXTHFGL4OE0N3ATWAT0AEJUIVTNNCRUYM2BDBW5AOTPB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: CVDUCJDQZN2GITGJSRMOA22KTGC4DV10JWSXB3BNVLVCY3FN
CLIENT_SECRET:SAOHDXTHFGL4OE0N3ATWAT0AEJUIVTNNCRUYM2BDBW5AOTPB


In [25]:
#Intializing CategoryId which will be used in url these are ids for parks, playgrounds, gardens
categoryId = '4bf58dd8d48988d163941735,4bf58dd8d48988d15a941735,4bf58dd8d48988d1e7941735,4bf58dd8d48988d166941735,52e81612bcbc57f1066b7a22'

In [26]:
#Function for forming url for each Neighbourhood/Locality seraching in an area of 3000m radius
#Function also fetches the necessary data from json and returns relevant information
radius = 3000
LIMIT = 300
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,categoryId)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
#Calling the above function for each Locality in the Dataframe df
berlin_venues = getNearbyVenues(names=df['Locality'],
                                   latitudes=df['latitude'],
                                   longitudes=df['longitude']
                                  )

(0101) Mitte
(0102) Moabit
(0103) Hansaviertel
(0104) Tiergarten
(0105) Wedding
(0106) Gesundbrunnen
(0201) Friedrichshain
(0202) Kreuzberg
(0301) Prenzlauer Berg
(0302) Weißensee
(0303) Blankenburg
(0304) Heinersdorf
(0305) Karow
(0306) Stadtrandsiedlung Malchow
(0307) Pankow
(0308) Blankenfelde
(0309) Buch
(0310) Französisch Buchholz
(0311) Niederschönhausen
(0312) Rosenthal
(0313) Wilhelmsruh
(0401) Charlottenburg
(0402) Wilmersdorf
(0403) Schmargendorf
(0404) Grunewald
(0405) Westend
(0406) Charlottenburg-Nord
(0407) Halensee
(0501) Spandau
(0502) Haselhorst
(0503) Siemensstadt
(0504) Staaken
(0505) Gatow
(0506) Kladow
(0507) Hakenfelde
(0508) Falkenhagener Feld
(0509) Wilhelmstadt
(0601) Steglitz
(0602) Lichterfelde
(0603) Lankwitz
(0604) Zehlendorf
(0605) Dahlem
(0606) Nikolassee
(0607) Wannsee
(0701) Schöneberg
(0702) Friedenau
(0703) Tempelhof
(0704) Mariendorf
(0705) Marienfelde
(0706) Lichtenrade
(0801) Neukölln
(0802) Britz
(0803) Buckow
(0804) Rudow
(0805) Gropiusstadt
(090

In [28]:
#data returned by the API 
berlin_venues.head(30)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,(0101) Mitte,52.51769,13.402376,Lustgarten,52.518469,13.399454,Garden
1,(0101) Mitte,52.51769,13.402376,Marx-Engels-Forum,52.518788,13.403254,Park
2,(0101) Mitte,52.51769,13.402376,Central Berlin,52.519535,13.404598,Garden
3,(0101) Mitte,52.51769,13.402376,Berliner Dom,52.519133,13.401046,Church
4,(0101) Mitte,52.51769,13.402376,Kastanienwäldchen,52.518168,13.395209,Park
5,(0102) Moabit,52.530102,13.342542,Unionspark,52.531583,13.338415,Park
6,(0102) Moabit,52.530102,13.342542,Kleiner Tiergarten,52.525647,13.343458,Park
7,(0102) Moabit,52.530102,13.342542,Spielplatz am Stephanplatz,52.5336,13.347076,Playground
8,(0102) Moabit,52.530102,13.342542,PARK ONE Tiefgarage Schultheiss Quartier,52.526599,13.343848,Parking
9,(0102) Moabit,52.530102,13.342542,Am Park,52.525905,13.343338,Mediterranean Restaurant


In [29]:
berlin_venues.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 7 columns):
Neighborhood              95 non-null object
Neighborhood Latitude     95 non-null float64
Neighborhood Longitude    95 non-null float64
Venue                     95 non-null object
Venue Latitude            95 non-null float64
Venue Longitude           95 non-null float64
Venue Category            95 non-null object
dtypes: float64(4), object(3)
memory usage: 5.3+ KB


In [30]:
#Finding out category of data returned
berlin_venues['Venue Category'].value_counts()

Park                        55
Playground                   6
Garden                       5
Hotel                        3
Café                         3
Parking                      2
Bus Stop                     2
Plaza                        2
Scenic Lookout               1
Palace                       1
Neighborhood                 1
Mediterranean Restaurant     1
Shopping Mall                1
Doctor's Office              1
Sculpture Garden             1
Church                       1
Corporate Amenity            1
History Museum               1
Light Rail Station           1
Multiplex                    1
Lake                         1
Water Park                   1
Gym / Fitness Center         1
Rock Climbing Spot           1
Beer Garden                  1
Name: Venue Category, dtype: int64

#### 5. Storing the resulting data as a CSV file named Berlin_venues_refined

In [None]:
berlin_venues.to_csv(r'C:\Users\GAURI TOSHNIWAL\Documents\zcoursera\Capstone\final_project\berlin_venues_refined.csv')