In [4]:
!conda install -c conda-forge geopy --yes 
print('Done')

Done


In [1]:
import numpy as np
import pandas as pd
import math

from geopy.geocoders import Nominatim
import requests

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium

import warnings
warnings.filterwarnings("ignore")

print('Libraries imported')

Libraries imported


### Step 1 - Import Dublin Populations File as DataFrame

In [61]:
df1 = pd.read_table('Dublin_Populations.txt', delim_whitespace=True,\
                       names=('Name', 'ED_Ref', '2011_Pop', '2016_Pop', \
                             '2016_Pop_Male', '2016_Pop_Female', '2011_2016_Percent_Change'),\
                   thousands=',')

df1.head(3)

Unnamed: 0,Name,ED_Ref,2011_Pop,2016_Pop,2016_Pop_Male,2016_Pop_Female,2011_2016_Percent_Change
0,Total,-,188761,201074,96048,105026,6.5
1,Ballinteer-Broadford,1,2885,2657,1272,1385,-7.9
2,Ballinteer-Ludford,2,2118,2032,984,1048,-4.1


### Step 2 - Wrangle Data into more Usable Format

In [62]:
# Step 2.1 - Drop Unnescessary Columns
df2 = df1.drop(['ED_Ref', '2016_Pop_Male', '2016_Pop_Female'], axis=1)

#Step 2.2 - Change Number Columns from String to Int/Float
types_dict = {'Name': str, '2011_Pop': int, '2016_Pop': int, '2011_2016_Percent_Change': float}
for col, col_type in types_dict.items():
    df2[col] = df2[col].astype(col_type)

for i in df2.index: 
    
    # Step 2.3 - Remove Asterisks
    if '*' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].rstrip('*')
        
    # Step 2.4 - Convert Underscores to Spaces
    if '_' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].replace('_', ' ')
        
    # Step 2.5 - Split Names at Hyphens, Rearrange, and Recombine with comma 
    # (eg, 'South City-Parkland' to 'Parkland, South City')
    if '-' in df2['Name'][i]:
        temp = df2['Name'][i]
        temp = temp.split("-")
        df2['Name'][i] = ", ".join((temp[1], temp[0]))
        
    # Step 2.6 - Convert Irish Letter to English
    if 'á' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].replace('á', 'a')
    if 'é' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].replace('é', 'e')
    if 'í' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].replace('í', 'i')
    if 'ó' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].replace('ó', 'o')
    if 'ú' in df2['Name'][i]:
        df2['Name'][i] = df2['Name'][i].replace('ú', 'u')
        
df2.head(3)

Unnamed: 0,Name,2011_Pop,2016_Pop,2011_2016_Percent_Change
0,Total,188761,201074,6.5
1,"Broadford, Ballinteer",2885,2657,-7.9
2,"Ludford, Ballinteer",2118,2032,-4.1


### Step 3 - Get Coordinates for each Electoral District

In [63]:
# Step 3.1 Create strings for Geopy Request
geolocator = Nominatim(user_agent="Dub_explorer")
Dublin = 'Dublin'

# Step 3.2 Drop 'Total' Row, and add Latitdue and Longitude Columns
df3 = df2.drop([0])
df3['Latitude'] = df3['Longitude'] = ''


for i in df3.index: 
    
    # Step 3.3 Feed address into Geopy, including ', Dublin'
    address = ", ".join((df2['Name'][i], Dublin))
    location = geolocator.geocode(address)
    
    # Step 3.4 If Geopy call is sucessful add Latitude and Longitude Coordinates, if not print 'Fail'
    if hasattr(location, 'latitude'):
        df3['Latitude'][i] = location.latitude
    else:
        print('Lat Fail at: ', df2['Name'][i])
        
    if hasattr(location, 'longitude'):
        df3['Longitude'][i] = location.longitude
    else:
        print('Long Fail at: ', df2['Name'][i])

df3.head(3)

Unnamed: 0,Name,2011_Pop,2016_Pop,2011_2016_Percent_Change,Latitude,Longitude
1,"Broadford, Ballinteer",2885,2657,-7.9,53.2779,-6.25856
2,"Ludford, Ballinteer",2118,2032,-4.1,53.2817,-6.25083
3,"Marley, Ballinteer",2443,2338,-4.3,53.2864,-6.26797


### Step 4 - Create Map of DLR Region

In [64]:
# Step 4.1 Create strings for Geopy Request
DLR_address = 'Dun Laoghaire-Rathdown, Co. Dublin, Ireland'
DLR_location = geolocator.geocode(DLR_address)
DLR_latitude = DLR_location.latitude
DLR_longitude = DLR_location.longitude

# create map of DLR
map_DLR = folium.Map(location=[DLR_latitude, DLR_longitude], zoom_start=11)

# add markers to map
for lat, lng, name in zip(df3['Latitude'], df3['Longitude'], df3['Name']):
    label = name
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_DLR)  
    
map_DLR

### Step 5 - Define Foursquare Data and Radius of Each ELectoral District

In [65]:
CLIENT_ID = 'NFBBKDDX23YFQRD1OWDDD3U2RPJA1YVBTWW0YL4TWTWL3MQ4'
CLIENT_SECRET = '45VQ03J3SJLNNDQOGC2ZCPFL2DWR3CWDBXS3C1YYAIVG1WBZ'
VERSION = '20180605'

df5 = df3
df5['Radius_m'] = df5['Km_Squared'] = ''

for i in df5.index:
    df5['Km_Squared'][i] = df5['2016_Pop'][i]/1700
    df5['Radius_m'][i] = math.sqrt(df5['Km_Squared'][i]/math.pi)*1000

df5['Radius_m'] = df5['Radius_m'].astype(int)
df5.head(3)

Unnamed: 0,Name,2011_Pop,2016_Pop,2011_2016_Percent_Change,Latitude,Longitude,Radius_m,Km_Squared
1,"Broadford, Ballinteer",2885,2657,-7.9,53.2779,-6.25856,705,1.56294
2,"Ludford, Ballinteer",2118,2032,-4.1,53.2817,-6.25083,616,1.19529
3,"Marley, Ballinteer",2443,2338,-4.3,53.2864,-6.26797,661,1.37529


### Step 6 - Create Function to Get Foursquare Data for each District

In [66]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
print('Done')

Done


### Step 7 - Use Function for each District

In [67]:
# Max number of venues returened for each District
LIMIT = 20

DLR_venues = getNearbyVenues(names=df5['Name'], latitudes=df5['Latitude'], \
                             longitudes=df5['Longitude'])

DLR_venues.rename(columns={"Neighborhood": "District", "Neighborhood Latitude": "District Latitude", \
                           'Neighborhood Longitude': 'District Longitude'}, inplace=True)
DLR_venues.head(3)

Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Broadford, Ballinteer",53.277866,-6.258556,Ballinteer House,53.274733,-6.254319,Pub
1,"Broadford, Ballinteer",53.277866,-6.258556,Meadowbrook,53.281851,-6.256625,Pool
2,"Broadford, Ballinteer",53.277866,-6.258556,SuperValu,53.275103,-6.253135,Supermarket


### Step 8 - Get Frequency of Venue Categories

In [68]:
# Step 8.1 - One hot encoding
DLR_onehot = pd.get_dummies(DLR_venues[['Venue Category']], prefix="", prefix_sep="")

# Step 8.2 - Add District column back to dataframe
DLR_onehot['District'] = DLR_venues['District'] 

# Step 8.3 - Move District column to the first column
fixed_columns = [DLR_onehot.columns[-1]] + list(DLR_onehot.columns[:-1])
DLR_onehot = DLR_onehot[fixed_columns]

# Step 8.4 - Mean of the frequency of occurrence of each category by District
DLR_grouped = DLR_onehot.groupby('District').mean().reset_index()

DLR_grouped.head(3)

Unnamed: 0,District,Asian Restaurant,Athletics & Sports,Bakery,Bar,Basketball Court,Beach,Bed & Breakfast,Bike Shop,Bistro,...,Sushi Restaurant,Tailor Shop,Tennis Court,Thai Restaurant,Theater,Trail,Train Station,Video Store,Warehouse Store,Wine Shop
0,"Adelaide, Dun Laoghaire",0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0
1,"Balally, Dundrum",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Beech Park, Foxrock",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Step 9 - Sort Venues in Descending Order for Districts 

In [104]:
# Step 9.1 - Define Function
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Step 9.2 - Run Function for Districts
num_top_venues = 5
indicators = ['st', 'nd', 'rd']
columns = ['District']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
DLR_venues_sorted = pd.DataFrame(columns=columns)
DLR_venues_sorted['District'] = DLR_grouped['District']

for ind in np.arange(DLR_grouped.shape[0]):
    DLR_venues_sorted.iloc[ind, 1:] = return_most_common_venues(DLR_grouped.iloc[ind, :], num_top_venues)

DLR_venues_sorted.head(10)

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Adelaide, Dun Laoghaire",Trail,Park,Basketball Court,Wine Shop,Furniture / Home Store
1,"Balally, Dundrum",Hotel,Gym,Park,Wine Shop,Furniture / Home Store
2,"Beech Park, Foxrock",Bus Stop,Garden Center,Café,Electronics Store,Entertainment Service
3,"Belfield, Clonskeagh",Pub,Café,Soccer Field,Restaurant,Business Service
4,"Booterstown, Blackrock",Pub,Train Station,Bar,Park,Furniture / Home Store
5,"Broadford, Ballinteer",Supermarket,Pub,Indian Restaurant,Convenience Store,Sports Club
6,"Bullock, Dalkey",Gym Pool,Trail,Scenic Lookout,Sports Club,Wine Shop
7,"Carrickmines, Foxrock",Drive-in Theater,Tennis Court,Light Rail Station,Wine Shop,Furniture / Home Store
8,"Carysfort, Blackrock",Playground,Park,Shoe Store,Café,Discount Store
9,"Central, Blackrock",Pub,Coffee Shop,Café,Burger Joint,Flea Market


### Step 10 - Add Population Data

In [86]:
# Step 10.1 - Create 2 Dataframes (Venues and Population Stats) and sort both by District Name
DLR_Final1 = DLR_venues_sorted.sort_values(by=['District'])
DLR_Final2 = df5.sort_values(by=['Name'])
DLR_Final2.reset_index(inplace=True)
DLR_Final2.drop(['index'], axis=1, inplace=True)

# Step 10.2 - Ensure the District Name Columns are Matching
for i in DLR_Final1.index:
    if DLR_Final1['District'][i] != DLR_Final2['Name'][i]:
        DLR_Final2.drop([i], inplace=True)
        DLR_Final2.reset_index(inplace=True)
        DLR_Final2.drop(['index'], axis=1, inplace=True)

# Step 10.3 - Add Population and Growth Columns to Venues Dataframe
DLR_Final1['Growth'] = DLR_Final2['2011_2016_Percent_Change']
DLR_Final1['Population'] = DLR_Final2['2016_Pop']
DLR_Final1['District Latitude'] = DLR_Final2['Latitude']
DLR_Final1['District Longitude'] = DLR_Final2['Longitude']

DLR_Final1.head(3)

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,Growth,Population,District Latitude,District Longitude
0,"Adelaide, Dun Laoghaire",Trail,Park,Basketball Court,Wine Shop,Furniture / Home Store,20.4,2234,53.2821,-6.12537
1,"Balally, Dundrum",Hotel,Gym,Park,Wine Shop,Furniture / Home Store,44.0,7049,53.2716,-6.2329
2,"Beech Park, Foxrock",Bus Stop,Garden Center,Café,Electronics Store,Entertainment Service,-3.4,1653,53.2739,-6.16774


### Step 11 - Display Districts with Maximum Population Growth and Venue Types

In [105]:
DLR_Final = DLR_Final1[['District', 'Growth', 'Population', '1st Most Common Venue','2nd Most Common Venue',\
           '3rd Most Common Venue','4th Most Common Venue', '5th Most Common Venue', 'District Latitude', \
                        'District Longitude']]

DLR_Final.sort_values(by=['Growth'], inplace=True, ascending=False)
DLR_Final.reset_index(inplace=True)
DLR_Final.drop(['index'], axis=1, inplace=True)

DLR_Final.head(10)

Unnamed: 0,District,Growth,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,District Latitude,District Longitude
0,"Balally, Dundrum",44.0,7049,Hotel,Gym,Park,Wine Shop,Furniture / Home Store,53.2716,-6.2329
1,"Rathmichael, Shankill",28.8,3395,Park,Wine Shop,Discount Store,Electronics Store,Entertainment Service,53.2333,-6.13929
2,Glencullen,26.7,16479,Golf Course,Pub,Wine Shop,Furniture / Home Store,Drive-in Theater,53.2221,-6.21809
3,"Roebuck, Clonskeagh",25.5,2556,Café,Bar,Coffee Shop,Tennis Court,Lake,53.3047,-6.22563
4,"Adelaide, Dun Laoghaire",20.4,2234,Trail,Park,Basketball Court,Wine Shop,Furniture / Home Store,53.2821,-6.12537
5,"Sandyford, Dundrum",19.0,6952,Pub,Café,Gym / Fitness Center,Chinese Restaurant,Lounge,53.2698,-6.22457
6,"Leopardstown, Stillorgan",17.5,2429,Bus Stop,Coffee Shop,Garden Center,Electronics Store,Entertainment Service,53.2747,-6.19276
7,"Woodpark, Ballinteer",15.7,5292,Pub,Supermarket,Burger Joint,Furniture / Home Store,Discount Store,53.2746,-6.24863
8,"Belfield, Clonskeagh",12.2,2740,Pub,Café,Soccer Field,Restaurant,Business Service,53.3122,-6.23393
9,"Booterstown, Blackrock",11.9,3328,Pub,Train Station,Bar,Park,Furniture / Home Store,53.3086,-6.19665


### Step 12 - Remake Map with Top 10 Districts for Population Growth

In [117]:
DLR_Final_Top = DLR_Final.head(10)

# Create another map of DLR
map_DLR2 = folium.Map(location=[DLR_latitude, DLR_longitude], zoom_start=11)

# add markers to map
for lat, lng, name in zip(DLR_Final_Top['District Latitude'], DLR_Final_Top['District Longitude'], \
                          DLR_Final_Top['District']):
    label = name
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_DLR2)  
    
map_DLR2

In [118]:
DLR_Final.index = np.arange(1, len(DLR_Final) + 1)
DLR_Final.head(10)

Unnamed: 0,District,Growth,Population,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,District Latitude,District Longitude
1,"Balally, Dundrum",44.0,7049,Hotel,Gym,Park,Wine Shop,Furniture / Home Store,53.2716,-6.2329
2,"Rathmichael, Shankill",28.8,3395,Park,Wine Shop,Discount Store,Electronics Store,Entertainment Service,53.2333,-6.13929
3,Glencullen,26.7,16479,Golf Course,Pub,Wine Shop,Furniture / Home Store,Drive-in Theater,53.2221,-6.21809
4,"Roebuck, Clonskeagh",25.5,2556,Café,Bar,Coffee Shop,Tennis Court,Lake,53.3047,-6.22563
5,"Adelaide, Dun Laoghaire",20.4,2234,Trail,Park,Basketball Court,Wine Shop,Furniture / Home Store,53.2821,-6.12537
6,"Sandyford, Dundrum",19.0,6952,Pub,Café,Gym / Fitness Center,Chinese Restaurant,Lounge,53.2698,-6.22457
7,"Leopardstown, Stillorgan",17.5,2429,Bus Stop,Coffee Shop,Garden Center,Electronics Store,Entertainment Service,53.2747,-6.19276
8,"Woodpark, Ballinteer",15.7,5292,Pub,Supermarket,Burger Joint,Furniture / Home Store,Discount Store,53.2746,-6.24863
9,"Belfield, Clonskeagh",12.2,2740,Pub,Café,Soccer Field,Restaurant,Business Service,53.3122,-6.23393
10,"Booterstown, Blackrock",11.9,3328,Pub,Train Station,Bar,Park,Furniture / Home Store,53.3086,-6.19665
