# <a href="http://www.galvanize.com/event/cognitive-builder-faire-san-francisco-4-21-4-23/"><img src="https://ibm.box.com/shared/static/yx6xj0mobw6lagt7g60wgplo7mae8ufk.png" width=1000></a>

<hr>

# <center> Learning FourSquare API with Python
## <center><a href="https://www.linkedin.com/in/aklson/">Alex Aklson</a>, Ph.D. (Data Scientist, IBM)

<div class="alert alert-block alert-info" style="margin-top: 20px">
<h2> Table of Contents</h2>  
<font size = 3>
1. <a href="#item1">Foursquare API Search Function</a>    
2. <a href="#item2">Explore a Given Venue</a>   
3. <a href="#item3">Explore a User</a>  
4. <a href="#item4">Foursquare API Explore Function</a>  
5. <a href="#item5">Get Trending Venues</a>    
6. <a href="#item6">Explore real world data - San Francisco Crime Rate</a>  
7. <a href="#item7">Use Foursquare API to do cool analysis</a>  
</font>
<br>
<p></p>

Estimated Time Needed: <strong>60 min</strong>
</div>

### Import necessary Libraries

In [None]:
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
 
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# plotting libraries
import matplotlib.pyplot as plt
%matplotlib inline

print "Libraries imported."

### Define Foursquare Credentials and Version

##### Make sure that you have created a Foursquare developer account and have your credentials handy

In [None]:
CLIENT_ID = "EABC1HD5EG2D5I3A2DHRKSDMEH1KVER0QYXLPMYOO5TBZJ3I"
CLIENT_SECRET = "L1AGVOUZRZNOQPZH2PR4JOAPE1ZZEFK4MHOK0COJSSCWYVHG"
VERSION = "20170511"
LIMIT = 30

#### Install plotly and make sure it is upgraded to the latest version

In [None]:
!pip install plotly

import plotly
import plotly.plotly as py
from plotly.graph_objs import *

#### Define an address to explore and convert it to its latitude and longitude equivalence

In [None]:
address = "315 Hudson Street New York, NY"

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print latitude, longitude

<a id="item1"></a>

## 1. Search for a specific venue category
> `https://api.foursquare.com/v2/venues/`**search**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&ll=`**latitude**`,`**longitude**`&v=`**VERSION**`&query=`**search_query**`&radius=`**radius**`&limit=`**LIMIT**

#### Define a search query for Italian restaurants

In [None]:
search_query = ""
print search_query + " ¯\_(ツ)_/¯ .... OK!"

#### Define the corresponding URL

In [None]:
radius = 1000
url="https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

#### Send the GET Request and examine the results

In [None]:
results = requests.get(url).json()
results

#### Get relevant part of JSON and transform it into a *pandas* dataframe

In [None]:
# assign relevant part of JSON to venues
venues = results["response"]["venues"]

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

#### Define information of interest and filter dataframe

In [None]:
# keep only columns that include venue name, url, and anything that is associated with location
filtered_columns = ['name', 'url', 'categories', 'verified'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.ix[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row["categories"]
    except:
        categories_list = row["venue.categories"]
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]["name"].encode('ascii',errors='ignore')

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split(".")[-1] for column in dataframe_filtered.columns]
dataframe_filtered.head(10)

#### Let's retrieve Foursquare's categories and corresponding IDs

In [None]:
# define URL for categories
url = "https://api.foursquare.com/v2/venues/categories?client_id={}&client_secret={}&v={}".format(CLIENT_ID, CLIENT_SECRET, VERSION)

# send call request and get categories
results = requests.get(url).json()
categories = results["response"]["categories"]

# loop through categories and print them
total = 0
for i in range(len(categories)):
    main_category = categories[i]["name"]
    print "Main Category: " + main_category, categories[i]["id"]
    count = 0
    for category in categories[i]["categories"]:
        print category["name"], category["id"]
        count += 1
        total += 1
    print count
print total

#### Redefine URL to include category ID for better matching

In [None]:
category_id = "" # ID for Italian restaurants 
url="https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&categoryId={}&limit={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, category_id, LIMIT)
url

#### Send GET request and place results in a dataframe

In [None]:
# send GET request
results = requests.get(url).json()

# assign relevant part of JSON to venues
venues = results["response"]["venues"]

# tranform venues into a dataframe
dataframe = json_normalize(venues)

# filter columns
dataframe_filtered = dataframe.ix[:, filtered_columns]

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split(".")[-1] for column in dataframe_filtered.columns]
dataframe_filtered.head(10)

#### Let's visualize the Italian restaurants that are nearby

##### Set your Plotly and Mapgox credentials

In [None]:
plotly.tools.set_credentials_file(username='username here', api_key='api key here') # plotly credentials
mapbox_access_token = 'mapbox access token here' # Mapbox token

In [None]:
# create the data to plot
data = Data(
    [
        Scattermapbox(
            lat=[latitude] + list(dataframe_filtered.lat),
            lon=[longitude] + list(dataframe_filtered.lng),
            mode='markers',
            marker=Marker(
                size=10,
                color=["red"] + ["blue"]*len(dataframe_filtered)
            ),
            text=["Galvanize"] + list(dataframe_filtered.name),
        )
    ]
)

# define the layout and centre the map around Galvanize
layout = Layout(
    autosize=False,
    width=3000,
    height=800,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=int(latitude),
            lon=int(longitude)
        ),
        pitch=0,
        zoom=6
    ),
)

# plot the data
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Italian Restaurants Around Galvanize New York')

<a id="item2"></a>

## 2. Explore a Given Venue
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**

### A. Let's explore the first verified restaurant -- _Galli Restaurant_

In [None]:
venue_id = "" # ID of Galli Restaurant
url="https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}".format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

#### Send GET request for result

In [None]:
result = requests.get(url).json()
result["response"]["venue"].keys()

### B. Get the venue's overall rating

In [None]:
result["response"]["venue"]["rating"]

### C. Get the number of tips

In [None]:
result["response"]["venue"]["tips"]["count"]

### D. Get the venue's tips
> `https://api.foursquare.com/v2/venues/`**VENUE_ID**`/tips?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**

#### Create URL and send GET request. Make sure to set limit to get all tips

In [None]:
## Galli Restaurant tips
url="https://api.foursquare.com/v2/venues/{}/tips?client_id={}&client_secret={}&v={}&limit=150".format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

results = requests.get(url).json()
results

#### Get tips and list of associated features

In [None]:
tips = results["response"]["tips"]["items"]

tip = results["response"]["tips"]["items"][0]
tip.keys()

#### Format column width and display all tips

In [None]:
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips) # json normalize tips

# columns to keep
filtered_columns = ["text", "agreeCount", "disagreeCount", "id", "user.firstName", "user.lastName", "user.gender", "user.id"]
tips_filtered = tips_df.ix[:, filtered_columns]

# display tips
tips_filtered

<a id="item3"></a>

## 3. Search a Foursquare User
> `https://api.foursquare.com/v2/users/`**USER_ID**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&v=`**VERSION**

### Define URL, send GET request and display features associated with user

In [None]:
user_id="" # user ID with most agree counts and complete profile

url="https://api.foursquare.com/v2/users/{}?client_id={}&client_secret={}&v={}".format(user_id, CLIENT_ID, CLIENT_SECRET, VERSION) # define URL

# send GET request
results = requests.get(url).json()
user_data = results["response"]["user"]

# display features associated with user
user_data.keys()

In [None]:
print "First Name: " + user_data["firstName"]
print "Last Name: " + user_data["lastName"]
print "Home City: " + user_data["homeCity"]

#### How many tips has this user submitted?

In [None]:
user_data["tips"]

### Get User's tips

In [None]:
# define tips URL
url="https://api.foursquare.com/v2/users/{}/tips?client_id={}&client_secret={}&v={}&limit={}".format(user_id, CLIENT_ID, CLIENT_SECRET, VERSION, 100)

# send GET request and get user's tips
results = requests.get(url).json()
tips = results["response"]["tips"]["items"]

# format column width
pd.set_option('display.max_colwidth', -1)

tips_df = json_normalize(tips)

# filter columns
filtered_columns = ["text", "agreeCount", "disagreeCount", "id"]
tips_filtered = tips_df.ix[:, filtered_columns]

# display user's tips
tips_filtered

#### Let's get the venue for the tip with a disagree count

In [None]:
tip_id = "" # tip id

# define URL
url = "http://api.foursquare.com/v2/tips/{}?client_id={}&client_secret={}&v={}".format(tip_id, CLIENT_ID, CLIENT_SECRET, VERSION)

# send GET Request and examine results
result = requests.get(url).json()
print result["response"]["tip"]["venue"]["name"]
print result["response"]["tip"]["venue"]["location"]

### Get User's friends

In [None]:
user_friends = json_normalize(user_data["friends"]["groups"][0]["items"])
user_friends

<a id="item4"></a>

## 4. Explore a location
> `https://api.foursquare.com/v2/venues/`**explore**`?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&ll=`**LATITUDE**`,`**LONGITUDE**`&v=`**VERSION**`&limit=`**LIMIT**

#### Get latitude and longitude values of Galli Restaurant

In [None]:
latitude =
longitude =

#### Define URL

In [None]:
radius = 500
url="https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

#### Send GET request and examine results

In [None]:
results = requests.get(url).json()
results

#### Get relevant part of JSON

In [None]:
items = results["response"]["groups"][0]["items"]
items[0]["venue"].keys()

#### Process JSON and convert it to a clean dataframe

In [None]:
dataframe = json_normalize(items) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.url', 'venue.categories'] + ["venue.rating"] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ["venue.id"]
dataframe_filtered = dataframe.ix[:, filtered_columns]

# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
dataframe_filtered.columns = [col.split(".")[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head()

#### Let's visualize these items on the map around our location

In [None]:
# create the data to plot
data = Data(
    [
        Scattermapbox(
            lat=[latitude] + list(dataframe_filtered.lat),
            lon=[longitude] + list(dataframe_filtered.lng),
            mode='markers',
            marker=Marker(
                size=10,
                color=["red"] + ["blue"]*len(dataframe_filtered)
            ),
            text=["Galli Restaurant"] + list(dataframe_filtered.categories),
        )
    ]
)

# define the layout and centre the map around Galvanize
layout = Layout(
    autosize=False,
    width = 3000,
    height = 800,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=int(latitude),
            lon=int(longitude)
        ),
        pitch=0,
        zoom=5
    ),
)

# plot the data
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Venues Around Galli Restaurant')

<a id="item5"></a>

## 5. Explore Trending Venues
> `https://api.foursquare.com/v2/venues/trending?client_id=`**CLIENT_ID**`&client_secret=`**CLIENT_SECRET**`&ll=`**LATITUDE**`,`**LONGITUDE**`&v=`**VERSION**`&limit=`**LIMIT**

### Let's get trending venues around us

In [None]:
# define URL
url="https://api.foursquare.com/v2/venues/trending?client_id={}&client_secret={}&ll={},{}&v={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION)

# send GET request and get trending venues
results = requests.get(url).json()
trending_venues = results["response"]["venues"]

trending_venues_df = json_normalize(trending_venues)

# filter columns
columns_filtered = ["name", "url", "categories"] + [col for col in trending_venues_df.columns if col.startswith("stats")] + ["location.distance", "location.address", "location.city", "location.postalCode", "location.state", "location.country", "location.lat", "location.lng"]
trending_venues_df = trending_venues_df.ix[:, columns_filtered]

# filter the category for each row
trending_venues_df['categories'] = trending_venues_df.apply(get_category_type, axis=1)

# display trending venues
trending_venues_df

In [None]:
# create the data to plot
data = Data(
    [
        Scattermapbox(
            lat=[latitude] + list(trending_venues_df["location.lat"]),
            lon=[longitude] + list(trending_venues_df["location.lng"]),
            mode='markers',
            marker=Marker(
                size=10,
                color=["red"] + ["blue"]*len(trending_venues_df)
            ),
            text=["Galli Restaurant"] + list(trending_venues_df.categories),
        )
    ]
)

# define the layout and centre the map around Galvanize
layout = Layout(
    autosize=False,
    width=3000,
    height=800,
    hovermode='closest',
    mapbox=dict(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=int(latitude),
            lon=int(longitude)
        ),
        pitch=0,
        zoom=5
    ),
)

# plot the data
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Trending Venues Around Galli Restaurant')

<a id="item6"></a>

## 6. Crime in San Francisco Dataset

<a href="https://dev.socrata.com/foundry/data.sfgov.org/cuks-n6tp">Crime incidents in San Francisco dataset compiled by police department</a>. The dataset contains data about incidents that occured from **01/01/2003** - **12/31/2016**. You can learn more about the dataset by clicking the link above.

#### Download dataset from IBM server

In [None]:
!wget -O crime_data_SF.csv https://ibm.box.com/shared/static/z2rjkrelj9fc87d1lrw6gkm9sb91zu5h.csv

#### Read the data into a *pandas* dataframe

In [None]:
crime_data = pd.read_csv('crime_data_SF.csv')
print "CSV file read into a pandas dataframe"

### Quick Exploration of the data

#### 1. First five crimes that occured in 2016

In [None]:
crime_data.head(5)

#### 2. Number of crimes and corresponding attributes

In [None]:
crime_data.shape

#### 3. Let's zoom into the month of June 2016

In [None]:
crime_data_jun = crime_data[(crime_data["Date"] >= '2016-06-01') & (crime_data["Date"] <= '2016-06-30')]
crime_data_jun.reset_index(inplace=True, drop=True)
crime_data_jun.shape

#### 4. List of attributes -- *any with missing values*?

In [None]:
crime_data_jun.notnull().sum()

#### 5. How many types of crimes-- *and* what are the top 10?

In [None]:
jun_categories_count = crime_data_jun.Category.value_counts()
print "There at {} types of crimes. And the top 10 are:".format(len(jun_categories_count))
print jun_categories_count.head(10)

<a id="item7"></a>

## 7. Using Foursquare to further analyze Crime in San Francisco

#### Write a function that returns the venues categories around a given location

In [None]:
# a function that returns the categories and the distances of the 10 closest venues to a crime scene
def get_nearby_venues(row):
    
    latitude = row["Y"]
    longitude = row["X"]
    
    # create the URL using the latitude and longitude of the crime scene
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION)
    
    # send the GET request and get the venues
    results = requests.get(url).json()
    venues = results["response"]["groups"][0]["items"]
    
    # convert the venues JSON into a dataframe for efficient processing
    dataframe = json_normalize(venues)
    
    # extract the categories and sort the venues by ascending order of distance and reset indices
    dataframe['venue.categories'] = dataframe.apply(get_category_type, axis=1)
    dataframe.sort_values(['venue.location.distance'], ascending=[True], inplace=True)
    dataframe.reset_index(inplace=True, drop=True)
    
    # return categories of the 10 closest venues
    num_venues = 10
    categories_nearby_venues = dataframe.loc[0:num_venues, "venue.categories"].str.cat(sep=', ')
    distances_nearby_venues = (dataframe.loc[0:num_venues, "venue.location.distance"].apply(str)).str.cat(sep=', ')
    return categories_nearby_venues, distances_nearby_venues

def get_nearby_venues_categories(row):
    return get_nearby_venues(row)[0]

def get_nearby_venues_distances(row):
    return get_nearby_venues(row)[1]

#### Write another function that prints statistics for a given crime category

In [None]:
def print_most_common_categories(crime):
    # filter data for the given crime
    crime_category_data = crime_data[crime_data["Category"] == crime]
    crime_category_data.reset_index(inplace=True, drop=True)

    # randomly select 10 crimes of this category
    random.seed(1234)
    num_crimes = 10
    sample_rows = np.arange(len(crime_category_data))
    np.random.shuffle(sample_rows)
    crime_category_data = crime_category_data.ix[sample_rows[0:num_crimes], :]
    crime_category_data.reset_index(inplace=True, drop=True)
    crime_category_data.head()
    
    # create columns of nearby venues categories and distances
    crime_category_data["nearby_categories"] = crime_category_data.apply(get_nearby_venues_categories, axis=1)
    crime_category_data["nearby_distances"] = crime_category_data.apply(get_nearby_venues_distances, axis=1)

    # process columns to compute mean distance
    most_common_categories = crime_category_data["nearby_categories"].str.cat(sep=', ')
    most_common_categories = most_common_categories.split(", ")
    most_common_categories_distances = crime_category_data["nearby_distances"].str.cat(sep=', ')
    most_common_categories_distances = map(int, most_common_categories_distances.split(", "))

    # print average distance for each category
    summary_dataframe = pd.DataFrame(columns = ['Category', 'Distance'])
    summary_dataframe["Category"] = pd.Series(most_common_categories)
    summary_dataframe["Distance"] = pd.Series(most_common_categories_distances)
    print summary_dataframe.groupby(["Category"])["Distance"].mean().sort_values()

#### Let's investigate LARCENY/THEFT

In [None]:
crime = "LARCENY/THEFT"
print_most_common_categories(crime)

#### How close to the nearest bar are people who drive under the influence when they get pulled over by cops?

In [None]:
DUI = crime_data_jun.loc[crime_data_jun["Category"] == "DRIVING UNDER THE INFLUENCE"] # subset dataframe for driving under the influence crimes
DUI.head() # display the first five instances

In [None]:
# get_closest_distance is a function that returns the distance between the crime scene and the closest bar
def get_closest_distance(row):
    
    latitude = row["Y"] # get latitude value of crime scene
    longitude = row["X"] # get longitude value of crime scence
    
    search_query = "bar" # define search query

    # define URL and send GET request
    url="https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&limit={}".format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, LIMIT)
    results = requests.get(url).json()

    # Get venues from returned results
    venues = results["response"]["venues"]
    bars_df = json_normalize(venues)

    # filter columns
    filtered_columns = ['name', 'url'] + [col for col in bars_df.columns if col.startswith('location')]
    bars_df = bars_df.ix[:, filtered_columns]

    # clean column names by keeping only last term
    bars_df.columns = [column.split(".")[-1] for column in bars_df.columns]

    # return distance
    bars_df.sort_values(['distance'], ascending=True, inplace=True)
    return bars_df["distance"][0] 

In [None]:
DUI["closest_distance"] = DUI.apply(get_closest_distance, axis=1) # add column of distance to closest bar for each crime incident

In [None]:
DUI.groupby(["Category"])["closest_distance"].mean() # compute the mean

## <center>Please Tweet about us! 
<a href=https://twitter.com/intent/tweet?text=Learning+%23datascience+at+%23CognitiveBuilder+%40BuildWithWatson+%40Galvanize+%40BigDataU+Free+data+science+courses%3A++http%3A%2F%2Fbit.ly%2F2o23Sl9><img src=https://ibm.box.com/shared/static/oza9rtt3xgxz310v9k197qadpb5yy38n.png style='border:1px solid #D3D3D3' width = 800></a>

# <center> Interested in learning how to combine Foursquare location data and machine learning?
## <center> Make sure to attend Polong Lin's tutorial session on Machine Learning Fundamentals with Location-Based Data in **Room Belasco**