# Neighbourhood comparison for Uppsala or any other city with Foursquare

### Importing the needed libraries

In [1]:
!pip install geopy
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans



### Configuration parameters

In [2]:
Mode = 2 # 1 = list of neighbours loaded from file; 2 = Uppsala case (web-scraped neighbour list); other = two single neighbourhoods comparison

Radius = 2000 # Range in meters within the venues are searched (too small -> unrepresented venues, too large -> overlapping venues)
N_clusters = 2 # Number of predefined clusters (for mode 1, it must be less or equal to 2)

# Exclusive parameters for Mode 1
Filename = "Test.csv" # Filename for the list of neighbourhoods (only for mode 1)

# Exclusive parameters for Mode 3
Input_location_A = "Göteborg" # First neighbour to compare (only mode 3)
Input_location_B = "Stockholm" # Second neighbour to compare (only mode 3)
Input_city_A = "" # Name of the city where the first neighbour is located (only mode 3, can be set to null if the location name is already univocal)
Input_city_B = "" # Name of the city where the second neighbour is located (only mode 3, can be set to null if the location name is already univocal)

# Foursquare account parameters
# PLEASE REPLACE THESE STRINGS WITH YOUR FOURSQUARE'S PERSONAL ACCOUNT
CLIENT_ID = ''
CLIENT_SECRET = ''
VERSION = '20180605'

### Function to retrieve from Foursquare the venues in a certain neighbourhood

In [3]:
# Function that retrieves data from a neighborhood

def getNearbyVenues(name, latitude, longitude, radius):
    venues_list = []
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        latitude,
        longitude,
        radius,
        100 # This is anyway the maximum allowed with a basic account on Foursquare
        )
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    # return only relevant information for each nearby venue
    if len(results) > 0:
        venues_list.append([(
            name,
            latitude,
            longitude,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])
    else:
        venues_list.append([(
            name,
            latitude,
            longitude,
            "",
            0.0,
            0.0,
            "")])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood',
              'Neighbourhood Latitude',
              'Neighbourhood Longitude',
              'Venue',
              'Venue Latitude',
              'Venue Longitude',
              'Category']   
    return(nearby_venues)

### Function for retrieving the list of neighbours in Uppsala

In [4]:
def Retrieve_Uppsala():
    URL = "https://sv.wikipedia.org/wiki/Lista_%C3%B6ver_stadsdelar_i_Uppsala"
    Soup = BeautifulSoup(requests.get(URL).content, 'html.parser')
    HTML = Soup.findAll('span', class_ = "mw-headline")
    List_of_locations = []
    for Line in HTML:
        Text = Line.get_text()
        if Text == "Källor" or "[" in Text or "staden" in Text:
            pass
        else:
            List_of_locations.append(Text)
    return List_of_locations

### Geolocator that retrieves coordinates, given a location name

In [5]:
geolocator = Nominatim(user_agent = "foo")

class Zone:
    def __init__(self, neighbour, city):
        self.neighbour = neighbour
        self.location = geolocator.geocode(neighbour + ", " + city)
        self.latitude = self.location.latitude
        self.longitude = self.location.longitude

Zones = []
if Mode == 2:
    City = "Uppsala"
    List_of_locations = Retrieve_Uppsala()
    for Neighbour in List_of_locations:
        Zones.append(Zone(Neighbour, City))
        print(Zones[-1].neighbour, "\t", Zones[-1].latitude, "\t", Zones[-1].longitude)
elif Mode == 1:
    City = ""
    List_of_locations = pd.read_csv(Filename)
    for Location in List_of_locations:
        Zones.append(Zone(Location, City))
        print(Zones[-1].neighbour, "\t", Zones[-1].latitude, "\t", Zones[-1].longitude)
else:
    Zones.append(Zone(Input_location_A, Input_city_A))
    Zones.append(Zone(Input_location_B, Input_city_B))
    print(Zones[0].neighbour, "\t", Zones[0].latitude, "\t", Zones[0].longitude)
    print(Zones[1].neighbour, "\t", Zones[1].latitude, "\t", Zones[1].longitude)

City 	 59.8586126 	 17.6387436
Fjärdingen 	 59.8587762 	 17.6332614
Berthåga 	 59.853763 	 17.5828786
Husbyborg 	 59.8720587 	 17.5971848
Hällby 	 59.6652808 	 16.8688837
Librobäck 	 59.8767726 	 17.5960343
Luthagen 	 59.864739 	 17.6198009
Rickomberga 	 59.8559593 	 17.6031045
Stenhagen 	 59.8519689 	 17.5665042
Eriksberg 	 59.8416597 	 17.6046785
Flogsta 	 59.8491618 	 17.5899671
Ekeby 	 59.8502007 	 17.6061205
Håga 	 59.838302 	 17.5761502
Kvarnbo 	 59.8429784 	 17.5655851
Kåbo 	 59.8467044 	 17.6278528
Norby 	 59.8308676 	 17.6176849
Polacksbacken 	 59.8407238 	 17.6467723
Starbo 	 59.8475573 	 17.5711317
Gottsunda 	 59.8091727 	 17.6261733
Sunnersta 	 59.7979494 	 17.6521501
Ulleråker 	 59.8308727 	 17.6506938
Ultuna 	 59.8191036 	 17.6509864
Valsätra 	 59.816778 	 17.6285104
Vårdsätra 	 59.7935126 	 17.6225932
Bergsbrunna 	 59.8172656 	 17.7179393
Nåntuna 	 59.8173527 	 17.6875123
Sävja 	 59.8116229 	 17.7005617
Vilan 	 59.8249242 	 17.6918897
Boländerna 	 59.8506932 	 17.6759787

### For each zone a list of venues is genereated by calling the getNearbyVenues defined above

In [6]:
Venues = []

for Z in Zones:
    V = getNearbyVenues(Z.neighbour, Z.latitude, Z.longitude, Radius)
    print(Z.neighbour, "retrieved")
    Uniques = V['Category'].unique()
    V_count = pd.DataFrame({"Type": Uniques, Z.neighbour: ((V['Category'] == U).sum() for U in Uniques)})
    Venues.append(V_count)

City retrieved
Fjärdingen retrieved
Berthåga retrieved
Husbyborg retrieved
Hällby retrieved
Librobäck retrieved
Luthagen retrieved
Rickomberga retrieved
Stenhagen retrieved
Eriksberg retrieved
Flogsta retrieved
Ekeby retrieved
Håga retrieved
Kvarnbo retrieved
Kåbo retrieved
Norby retrieved
Polacksbacken retrieved
Starbo retrieved
Gottsunda retrieved
Sunnersta retrieved
Ulleråker retrieved
Ultuna retrieved
Valsätra retrieved
Vårdsätra retrieved
Bergsbrunna retrieved
Nåntuna retrieved
Sävja retrieved
Vilan retrieved
Boländerna retrieved
Fyrislund retrieved
Fålhagen retrieved
Kungsängen retrieved
Kuggebro retrieved
Sala backe retrieved
Slavsta retrieved
Vaksala retrieved
Årsta retrieved
Brillinge retrieved
Gamla Uppsala retrieved
Gränby retrieved
Kvarngärdet retrieved
Löten retrieved
Nyby retrieved
Svartbäcken retrieved
Tunabackar retrieved
Ärna retrieved


In [7]:
Merged_data = pd.DataFrame(Venues[0])
for V in Venues[1:len(Venues)]:
    Merged_data = pd.merge(Merged_data, V, on = "Type", how = "outer").fillna(0)
Merged_data.corr().style.background_gradient(cmap = 'Reds')

Unnamed: 0,City,Fjärdingen,Berthåga,Husbyborg,Hällby,Librobäck,Luthagen,Rickomberga,Stenhagen,Eriksberg,Flogsta,Ekeby,Håga,Kvarnbo,Kåbo,Norby,Polacksbacken,Starbo,Gottsunda,Sunnersta,Ulleråker,Ultuna,Valsätra,Vårdsätra,Bergsbrunna,Nåntuna,Sävja,Vilan,Boländerna,Fyrislund,Fålhagen,Kungsängen,Kuggebro,Sala backe,Slavsta,Vaksala,Årsta,Brillinge,Gamla Uppsala,Gränby,Kvarngärdet,Löten,Nyby,Svartbäcken,Tunabackar,Ärna
City,1.0,0.975529,0.126712,0.366197,-0.064135,0.279338,0.753881,0.699992,0.032239,0.212382,0.18896,0.700391,0.066263,0.004981,0.809737,0.235602,0.664899,0.059286,0.176586,0.051622,0.189879,0.192655,0.200094,0.007404,-0.003694,0.09898,-0.003694,0.042779,0.298698,-0.056874,0.803782,0.550373,-0.084441,0.415465,-0.085377,0.189396,0.140608,0.131997,-0.056131,0.246574,0.881485,0.362681,0.075452,0.769321,0.669876,-0.074052
Fjärdingen,0.975529,1.0,0.231116,0.443901,-0.059959,0.353399,0.745025,0.708501,0.063198,0.256151,0.269982,0.707406,0.118018,0.03548,0.793116,0.310501,0.64233,0.14832,0.229017,0.068485,0.235397,0.241973,0.247436,0.021896,-0.006416,0.086064,-0.006416,0.035288,0.332685,-0.048135,0.785397,0.490062,-0.083285,0.459097,-0.073485,0.240144,0.159376,0.197908,-0.056046,0.313044,0.863432,0.40691,0.089384,0.762597,0.691933,-0.071386
Berthåga,0.126712,0.231116,1.0,0.303667,-0.027393,0.368543,0.087502,0.277036,0.749594,0.50674,0.91341,0.332333,0.851956,0.751388,0.104298,0.483159,-0.067906,0.961905,0.542783,0.279792,0.321161,0.518202,0.449982,0.293754,-0.052347,-0.068581,-0.052347,-0.062328,0.390644,0.427659,0.246867,0.104128,0.324042,0.448849,0.361814,0.500512,0.403277,0.642619,0.026489,0.620783,0.16735,0.585141,0.381293,0.127275,0.251697,-0.068581
Husbyborg,0.366197,0.443901,0.303667,1.0,-0.029477,0.874082,0.252879,0.267028,0.103309,0.070256,0.276166,0.167909,0.137745,0.084797,0.122402,0.162187,0.083919,0.251515,0.101435,0.055497,0.12059,0.125992,0.128509,-0.034599,0.133579,0.237206,0.133579,0.272108,0.390538,0.097923,0.225927,0.006357,-0.035378,0.212409,0.077514,0.335321,0.233061,0.221592,-0.091618,0.22156,0.330404,0.37858,-0.060729,0.250266,0.355022,-0.073797
Hällby,-0.064135,-0.059959,-0.027393,-0.029477,1.0,-0.023467,-0.057477,-0.054281,-0.030467,-0.032267,-0.033767,-0.053175,-0.033318,-0.031749,-0.06036,-0.030765,-0.045698,-0.028612,-0.026565,-0.023467,-0.035564,-0.032997,-0.030592,-0.025209,-0.016333,-0.021398,-0.016333,-0.019447,-0.059684,-0.044048,-0.064045,-0.057471,-0.034467,-0.062289,-0.0378,-0.050113,-0.042895,-0.045628,-0.026565,-0.047511,-0.065894,-0.05758,-0.052628,-0.060907,-0.062178,-0.021398
Librobäck,0.279338,0.353399,0.368543,0.874082,-0.023467,1.0,0.196002,0.170715,0.148607,0.046298,0.257371,0.049773,0.190119,0.129381,-0.05236,0.114881,-0.035741,0.311754,0.070508,-0.064433,0.037909,0.016308,0.020335,-0.069216,0.166821,0.172338,0.166821,0.198629,0.379975,0.122711,0.183818,-0.005797,-0.048107,0.237129,0.09481,0.391018,0.268269,0.297092,-0.072939,0.299124,0.224436,0.453029,-0.009887,0.187966,0.326006,-0.058752
Luthagen,0.753881,0.745025,0.087502,0.252879,-0.057477,0.196002,1.0,0.73138,-0.034228,0.047317,0.126617,0.678121,-0.00565,-0.054387,0.670041,0.03724,0.560795,0.058533,0.014589,-0.046081,-0.006753,0.066125,0.024253,-0.055218,-0.083914,-0.058998,-0.083914,-0.06905,0.331246,-0.057509,0.741589,0.553538,-0.049448,0.569677,-0.04342,0.106898,0.099199,0.117301,-0.108379,0.259168,0.728405,0.235134,0.074694,0.987181,0.764672,-0.058998
Rickomberga,0.699992,0.708501,0.277036,0.267028,-0.054281,0.170715,0.73138,1.0,0.172799,0.374189,0.33766,0.869851,0.183161,0.139889,0.689401,0.276206,0.370174,0.211561,0.258607,0.144069,0.130442,0.296239,0.24448,0.105694,-0.029547,-0.014413,-0.029547,0.008982,0.20334,0.000827,0.611344,0.380794,-0.00691,0.335516,0.015139,0.250748,0.228166,0.213508,-0.001136,0.21264,0.642253,0.265571,0.168983,0.740448,0.707646,-0.054907
Stenhagen,0.032239,0.063198,0.749594,0.103309,-0.030467,0.148607,-0.034228,0.172799,1.0,0.580343,0.748548,0.199181,0.884832,0.920462,0.066218,0.270946,-0.060107,0.717735,0.507834,0.264737,0.287321,0.494708,0.408848,0.444785,0.265077,0.100207,0.265077,0.123149,0.387178,0.53413,0.152021,0.172403,0.374618,0.332106,0.471929,0.398079,0.407255,0.514739,0.051371,0.48677,0.054044,0.457974,0.360684,-0.00613,0.031255,-0.076277
Eriksberg,0.212382,0.256151,0.50674,0.070256,-0.032267,0.046298,0.047317,0.374189,0.580343,1.0,0.629496,0.389722,0.623622,0.576506,0.388098,0.723856,0.165776,0.411344,0.726835,0.316081,0.497044,0.657814,0.671176,0.42235,0.219987,0.124214,0.219987,0.094258,0.163311,0.188796,0.077267,0.052707,0.138168,0.197313,0.20964,0.313229,0.266128,0.277358,0.15421,0.281091,0.074863,0.353272,0.318771,0.058894,0.132461,-0.080783


### Clustering

In [8]:
Types = [T for T in Merged_data['Type']]
Transposed_data = Merged_data.drop('Type', 1).transpose()
Transposed_data.columns = Types

In [9]:
kmeans = KMeans(n_clusters = N_clusters, random_state = 0).fit(Transposed_data)

In [10]:
# Cluster labels
Transposed_data.insert(0, 'Cluster Labels', kmeans.labels_)

In [11]:
Transposed_data.sort_values(by = "Cluster Labels").tail()

Unnamed: 0,Cluster Labels,Seafood Restaurant,Coffee Shop,Bakery,Beer Bar,Juice Bar,American Restaurant,Pub,Restaurant,Bookstore,...,BBQ Joint,Perfume Shop,Hockey Arena,Brewery,Historic Site,Bus Station,Food Court,Airport,Arts & Crafts Store,Hostel
Ekeby,1,2.0,2.0,1.0,1.0,0.0,1.0,2.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Kåbo,1,2.0,2.0,1.0,2.0,1.0,1.0,2.0,2.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Tunabackar,1,2.0,2.0,1.0,1.0,0.0,1.0,1.0,2.0,1.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
Luthagen,1,2.0,4.0,2.0,2.0,1.0,2.0,4.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
City,1,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
