# Joke's Capstone project

## Finding the similarity between Antwerp districts

![Antwerp Skyline](Antwerp_Skyline.png)

### Introduction/Business Problem

Living in Antwerp myself, I'm interested in knowing which of the districts/area's in Antwerp are most similar to one another when it comes to having similar venues.

The target audience for this investigation is the potential business owner that wants to relocate his business in Antwerp and wants to consider a similar area. Alternatively this investigation can be used by people wanting to move in Antwerp to a similar area as the one they are living in and currently enjoying.

### Code

In [1]:
import pandas as pd
import numpy as np
import pgeocode
from pypostalcode import PostalCodeDatabase
import folium # map rendering library
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from sklearn.cluster import KMeans # import k-means from clustering stage
import matplotlib.cm as cm # Matplotlib and associated plotting modules
import matplotlib.colors as colors
import os
import webbrowser


# EXERCISE PART 1: Creating the dataframe and transforming the data
# -----------------------------------------------------------------

d = pd.read_html("http://www.geonames.org/postalcode-search.html?q=&country=BE")
df = d[2]
df.columns = ['SequenceNr', 'City', 'PostalCode','Country', 'Region', 'Province', 'MajorCity']

# Drop rows where Borough is "Not assgined"
df = df.replace('Not assigned', np.nan)
df = df.dropna(subset=['SequenceNr'])
df = df.drop(columns='SequenceNr')
df = df.drop(columns='Country')
df = df.drop(columns= 'Region')

df_antwerp = df[df.MajorCity == "Antwerpen"]

# EXERCISE PART 2: Adding latitude & longitude to the dataframe
# -------------------------------------------------------------

nomi = pgeocode.Nominatim('be')

# Function to search for latitude based on postal code
def searchlatitude(x):
     try:
         t_postalcodeinfo = nomi.query_postal_code(x)
         latitude = t_postalcodeinfo[-3]
         return latitude
     except:
         return "Not found"

# Function to search for longitude based on postal code
def searchlongitude(x):
    try:
        t_postalcodeinfo = nomi.query_postal_code(x)
        longitude = t_postalcodeinfo[-2]
        return longitude
    except:
        return "Not found"


# Add columns Latitude and Longitude
df_antwerp['Latitude'] = df_antwerp.apply(lambda row: searchlatitude(row.PostalCode), axis = 1)
df_antwerp['Longitude'] = df_antwerp.apply(lambda row: searchlongitude(row.PostalCode), axis = 1)
# print(df_antwerp)

# Drop the rows for which the postal code was not found
df_antwerp = df_antwerp.replace('Not found', np.nan)
df_antwerp = df_antwerp.dropna(subset=['Latitude'])


# EXERCISE PART 3: Exploring & clustering the neighborhoods of Toronto
# --------------------------------------------------------------------
# Get location of Antwerp
address = 'Antwerp'
geolocator = Nominatim(user_agent="antwerp_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Antwerp are {}, {}.'.format(latitude, longitude))

# Create map of New York using latitude and longitude values
map_antwerp = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add markers to map
for lat, lng, postalcode, city in zip(df_antwerp['Latitude'], df_antwerp['Longitude'], df_antwerp['PostalCode'], df_antwerp['City']):
     label = '{}, {}'.format(postalcode, city)
     label = folium.Popup(label, parse_html=True)
     folium.CircleMarker(
         [lat, lng],
         radius=5,
         popup=label,
         color='blue',
         fill=True,
         fill_color='#3186cc',
         fill_opacity=0.7,
         parse_html=False).add_to(map_antwerp)

# Show map (in Jupyter Notebook)
map_antwerp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


The geograpical coordinate of Antwerp are 51.2211097, 4.3997081.
