# IBM Applied Data Science Capstone Course by Coursera
### Week 5 Final Report
**_Opening a New Parking spot in Taipei city, Taiwan**
- Build a dataframe of neighborhoods in Taipei, Taiwan by web scraping the data from Wikipedia page
- Get the geographical coordinates of the neighborhood
- Obtain the venue data for the neighborhoods from taipeicity.github
- Explore and cluster the neighborhoods
- Select the best cluster to open a new parking spot
***
### 1. Import libraries

In [22]:
# -*- coding:utf-8 -*-
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


In [1]:
!pip install geopy
!pip install geocoder
!pip install bs4
!pip install lxml
!pip install html5lib

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/53/fc/3d1b47e8e82ea12c25203929efb1b964918a77067a874b2c7631e2ec35ec/geopy-1.21.0-py2.py3-none-any.whl (104kB)
[K     |████████████████████████████████| 112kB 14.6MB/s eta 0:00:01
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.21.0
Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 8.5MB/s ta 0:00:011
[?25hCollecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratel

### 2. Scrap data from Wikipedia page into a DataFrame

In [92]:
df = pd.read_html('https://en.wikipedia.org/wiki/District_(Taiwan)',header=0)[5]

In [111]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [272]:
url = 'https://zh.wikipedia.org/wiki/%E8%87%BA%E5%8C%97%E5%B8%82'
r = requests.get(url) 
soup = BeautifulSoup(r.text,"html.parser")
table = soup.find("table",{"class":"sortable wikitable"})
columns = [th.text.replace('\n', '') for th in table.find('tr').find_all('th')]
trs = table.find_all('tr')[1:]
rows = list()
for tr in trs:
    rows.append([td.text.replace('\n', '').replace('\xa0', '') for td in tr.find_all('td')])
taipei_density = pd.DataFrame(data=rows, columns=columns)


In [273]:
taipei_density

Unnamed: 0,區名,面積（km²）,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花
0,中正區,7.6071,31,580,157743,-271,20736,100.0,木棉花
1,大同區,5.6815,25,521,125909,-134,22161,103.0,茶花
2,中山區,13.6821,42,869,227266,-121,16610,104.0,蝴蝶蘭
3,松山區,9.2878,33,763,204043,-150,21969,105.0,朱槿
4,大安區,11.3614,53,1022,307526,-105,27068,106.0,波斯菊
5,萬華區,8.8522,36,723,186848,-228,21108,108.0,白牡丹
6,信義區,11.2077,41,904,219744,-277,19607,110.0,野牡丹
7,士林區,62.3682,51,995,283282,-177,4542,111.0,玫瑰花
8,北投區,56.8216,42,827,253155,-164,4455,112.0,櫻花
9,內湖區,31.5787,39,906,285526,-269,9042,114.0,九重葛


In [120]:
df.head()

Unnamed: 0,Name,Latitude,Longitude
0,Zhongzheng,25.094625,121.522533
1,Wanhua,25.03694,121.49944
2,Datong,25.06532,121.5215
3,Zhongshan,25.04841,121.53564
4,Songshan,25.21675,121.5672


In [96]:
#We only need the districts in Taipei city
df = df[df['City'] == 'Taipei']

In [97]:
# Extract only the column of District name
taipei_list = df.Name.to_list()
taipei_list

['Zhongzheng',
 'Wanhua',
 'Datong',
 'Zhongshan',
 'Songshan',
 'Daan',
 'Xinyi',
 'Neihu',
 'Nangang',
 'Shilin',
 'Beitou',
 'Wenshan']

### 3. Get the geographical coordinates

In [200]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Taipei, Taiwan'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [201]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in taipei_list ]

In [202]:
coords

[[25.094624994713428, 121.52253339911759],
 [25.036939948929618, 121.49944002221704],
 [25.065320000000042, 121.52150000000006],
 [25.048409996104933, 121.53564005107046],
 [25.216750000000047, 121.56720000000007],
 [25.037370000000067, 121.56355000000008],
 [25.114770043857153, 121.52751996335634],
 [25.069090000000074, 121.58847000000003],
 [25.054370000000063, 121.60681000000011],
 [25.050509988891537, 121.52929000000006],
 [25.13289000000003, 121.50253000000009],
 [24.98974000000004, 121.56963000000007]]

In [203]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [204]:
# merge the coordinates into the original dataframe
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']

In [205]:
df.columns

Index(['Name', 'area_latitude', 'area_Longitude', 'Latitude', 'Longitude'], dtype='object')

In [206]:
df = df[['Name', 'Latitude', 'Longitude']]
df.head()

Unnamed: 0,Name,Latitude,Longitude
0,Zhongzheng,25.094625,121.522533
1,Wanhua,25.03694,121.49944
2,Datong,25.06532,121.5215
3,Zhongshan,25.04841,121.53564
4,Songshan,25.21675,121.5672


In [207]:
# save the DataFrame as CSV file
df.to_csv("coord_taipei.csv", index=False)

In [208]:
# get the coordinates of Kuala Lumpur
address = 'Taipei, Taiwan'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Taipei, Taiwan {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Taipei, Taiwan 25.0375198, 121.5636796.


In [209]:
# create map of Toronto using latitude and longitude values
tw_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Name']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(tw_map)  
    
tw_map

## Foursquare

In [210]:
CLIENT_ID = 'KG2PQYN1UVMP3TURF5OGAJP1CJSSNGCQGNFJ001V2EG0CP5K' # your Foursquare ID
CLIENT_SECRET = 'N0CWNN5B3YDDD33ZK5RJF0WM5YLHSQF1FSZUID2CQ3VJ4Q1I' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KG2PQYN1UVMP3TURF5OGAJP1CJSSNGCQGNFJ001V2EG0CP5K
CLIENT_SECRET:N0CWNN5B3YDDD33ZK5RJF0WM5YLHSQF1FSZUID2CQ3VJ4Q1I


In [211]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(df['Latitude'], df['Longitude'], df['Name']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [212]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['District', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude','VenueCategory']

print(venues_df.shape)
venues_df.head()

(989, 7)


Unnamed: 0,District,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Zhongzheng,25.094625,121.522533,華榮街菜市場,25.094891,121.523713,Farmers Market
1,Zhongzheng,25.094625,121.522533,以利泡泡冰（士林本店）,25.094873,121.523737,Ice Cream Shop
2,Zhongzheng,25.094625,121.522533,Cha for Tea (喫茶趣),25.094877,121.528528,Tea Room
3,Zhongzheng,25.094625,121.522533,誠品士林店 Eslite Bookstore,25.092952,121.525952,Bookstore
4,Zhongzheng,25.094625,121.522533,Taipei Children's Amusement Park (台北市立兒童新樂園),25.097059,121.515427,Theme Park


### Venue data of Parking lot

Data is collected from https://taipeicity.github.io/traffic_realtime/

In [58]:
with open('TCMSV_alldesc.json') as json_data:
    parkinglot_data = json.load(json_data)

In [59]:
sum_parking_spot = len(parkinglot_data['data']['park'])

In [60]:
parkinglot_data['data']['park'][0]

{'id': '003',
 'area': '信義區',
 'name': '臺北市災害應變中心地下停車場',
 'type': '1',
 'type2': '1',
 'summary': '立體式小型車169格(含身心障礙停車位5格)。立體式機車208格(含身心障礙停車位5格)。',
 'address': '莊敬路391巷11弄2號地下',
 'tel': '2345-3859',
 'payex': '計時：小型車計時30元，全程以半小時計費；機車10元/時，當日當次最高收費上限20元(隔日另計)。月票：小型車全日月票5,000元，日間優惠月票2,600元(7-19)；機車月票300元。',
 'serviceTime': '00:00:00~23:59:59',
 'tw97x': '307142.121',
 'tw97y': '2769087.67',
 'totalcar': 174,
 'totalmotor': 204,
 'totalbike': 0,
 'totalbus': 0,
 'ChargeStation': {'StationName': '臺北市災害應變中心地下停車場',
  'StationAddr': '臺北市信義區台北市信義區莊敬路391巷11弄2號 地下',
  'locLongitude': 121.5672877,
  'locLatitude': 25.031841,
  'openFlag': 'Y',
  'isCharge': 'N',
  'contactName': '李素娥',
  'contactMobilNo': '2345-3859',
  'scoketCount': 2,
  'availableCount': 2,
  'country': '臺北市',
  'town': '信義區'},
 'Pregnancy_First': '3',
 'Handicap_First': '5',
 'totallargemotor': '1',
 'ChargingStation': '2',
 'Taxi_OneHR_Free': '0',
 'AED_Equipment': '0',
 'CellSignal_Enhancement': '0',
 'Accessibility_Elevator

### extract data from json and convert to useful data

In [161]:
area_parkinglot_data = []
name_parkinglot_data = []
addresss_parkinglot_data = []
Xcod_parkinglot_data = []
Ycod_parkinglot_data = []

In [162]:
for i in range(sum_parking_spot):
    try:
        addresss_parkinglot_data.append(parkinglot_data['data']['park'][i]['address'])
    except:
        addresss_parkinglot_data.append(0)
    
    #print(parkinglot_data['data']['park'][i]['id'])

In [163]:
for i in range(sum_parking_spot):
    try:
        name_parkinglot_data.append(parkinglot_data['data']['park'][i]['name'])
    except:
        name_parkinglot_data.append(0)
    
    #print(parkinglot_data['data']['park'][i]['id'])

In [164]:


for i in range(sum_parking_spot):
    try:
        Xcod_parkinglot_data.append(parkinglot_data['data']['park'][i]['EntranceCoord']['EntrancecoordInfo'][0]['Xcod'])
    except:
        Xcod_parkinglot_data.append(0)
    
    #print(parkinglot_data['data']['park'][i]['id'])

In [165]:
for i in range(sum_parking_spot):
    try:
        Ycod_parkinglot_data.append(parkinglot_data['data']['park'][i]['EntranceCoord']['EntrancecoordInfo'][0]['Ycod'])
    except:
        Ycod_parkinglot_data.append(0)
    
    #print(parkinglot_data['data']['park'][i]['id'])

In [166]:
for i in range(sum_parking_spot):
    try:
        area_parkinglot_data.append(parkinglot_data['data']['park'][i]['area'])
    except:
        area_parkinglot_data.append(0)
    
    #print(parkinglot_data['data']['park'][i]['id'])

In [167]:
pd.unique(area_parkinglot_data)

array(['信義區', '北投區', '萬華區', '中山區', '中正區', '大同區', '士林區', '松山區', '大安區',
       '文山區', '內湖區', '南港區', '中正', '大安', '松山', '信義', '南港', '文山'],
      dtype=object)

### Cleanning Data

In [168]:
for i, element in enumerate(area_parkinglot_data):
    if element == '中正':
        print(element)
        area_parkinglot_data[i] = '中正區'
    if element == '松山':
        print(element)
        area_parkinglot_data[i] = '松山區'
    if element == '大安':
        print(element)
        area_parkinglot_data[i] = '大安區'
    if element == '信義':
        print(element)
        area_parkinglot_data[i] = '信義區'
    if element == '南港':
        print(element)
        area_parkinglot_data[i] = '南港區'
    if element == '文山':
        print(element)
        area_parkinglot_data[i] = '文山區'

中正
中正
中正
大安
大安
大安
大安
松山
信義
南港
南港
文山


In [172]:
parking_coor = pd.DataFrame(list(zip(name_parkinglot_data,Xcod_parkinglot_data,Ycod_parkinglot_data,addresss_parkinglot_data,area_parkinglot_data)), columns=['name', 'Latitude','longitude','address','area'])

### Translate data from chinese to english

In [173]:
area_chinese = pd.unique(np.array(parking_coor['area'])).tolist()
#area_english = pd.unique(np.array(df['Name'])).tolist()

In [174]:
area_english = ['Xinyi','Beitou','Wanhua', 'Zhongshan', 'Zhongzheng', 'Datong', 'Shilin', 'Songshan', 'Daan', 'Wenshan', 'Neihu',  'Nangang']

In [175]:
area_combine = pd.DataFrame(list(zip(area_chinese,area_english)), columns=['area_chs', 'area_eng'])

In [249]:
area_combine.head()

Unnamed: 0,area_chs,area_eng
0,信義區,Xinyi
1,北投區,Beitou
2,萬華區,Wanhua
3,中山區,Zhongshan
4,中正區,Zhongzheng


In [284]:
taipei_english = ['Zhongzheng','Datong','Zhongshan', 'Songshan', 'Daan', 'Wanhua', 'Xinyi', 'Shilin', 'Beitou', 'Neihu', 'Nangang',  'Wenshan']
taipei_english = pd.DataFrame(taipei_english,columns=['district'])
taipei_english.head()

Unnamed: 0,district
0,Zhongzheng
1,Datong
2,Zhongshan
3,Songshan
4,Daan


In [281]:
pd.concat([taipei_density,taipei_english],axis = 1)

Unnamed: 0,區名,面積（km²）,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,0
0,中正區,7.6071,31,580,157743,-271,20736,100.0,木棉花,Zhongzheng
1,大同區,5.6815,25,521,125909,-134,22161,103.0,茶花,Datong
2,中山區,13.6821,42,869,227266,-121,16610,104.0,蝴蝶蘭,Zhongshan
3,松山區,9.2878,33,763,204043,-150,21969,105.0,朱槿,Songshan
4,大安區,11.3614,53,1022,307526,-105,27068,106.0,波斯菊,Daan
5,萬華區,8.8522,36,723,186848,-228,21108,108.0,白牡丹,Wanhua
6,信義區,11.2077,41,904,219744,-277,19607,110.0,野牡丹,Xinyi
7,士林區,62.3682,51,995,283282,-177,4542,111.0,玫瑰花,Shilin
8,北投區,56.8216,42,827,253155,-164,4455,112.0,櫻花,Beitou
9,內湖區,31.5787,39,906,285526,-269,9042,114.0,九重葛,Neihu


### Change columns from chinese to english (區名 to District)

In [285]:
taipei_density[['區名']] = taipei_english[['district']]

In [314]:
taipei_density.head()

Unnamed: 0,District,面積（km²）,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude
0,Zhongzheng,7.6071,31,580,157743,-271,20736,100,木棉花,25.094625,121.522533
1,Datong,5.6815,25,521,125909,-134,22161,103,茶花,25.06532,121.5215
2,Zhongshan,13.6821,42,869,227266,-121,16610,104,蝴蝶蘭,25.04841,121.53564
3,Songshan,9.2878,33,763,204043,-150,21969,105,朱槿,25.21675,121.5672
4,Daan,11.3614,53,1022,307526,-105,27068,106,波斯菊,25.03737,121.56355


In [315]:
#last row has information of sum of data, we have to delete last row
taipei_density = taipei_density[:-1]

### parking_coor and taipei_density need to be translated

In [268]:
parking_coor.head()

Unnamed: 0,name,Latitude,longitude,address,area,area_eng,area_latitude,area_Longitude,parking_lot
0,府前廣場地下停車場,25.03648987,121.5621068,松壽路1號地下,信義區,Xinyi,25.11477,121.52752,parking_lot
1,松壽廣場地下停車場,25.036966,121.565523,松智路75號地下,信義區,Xinyi,25.11477,121.52752,parking_lot
2,臺北市災害應變中心地下停車場,25.028728,121.566111,莊敬路391巷11弄2號地下,信義區,Xinyi,25.11477,121.52752,parking_lot
3,雅祥公園地下停車場,25.04754574,121.5716298,松隆路123巷7號地下,信義區,Xinyi,25.11477,121.52752,parking_lot
4,立農公園地下停車場,25.118127,121.502995,承德路7段372號地下,北投區,Beitou,25.13289,121.50253,parking_lot


In [244]:
taipei_density.head()

Unnamed: 0,區名,面積（km²）,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花
0,中正區,7.6071,31,580,157743,-271,20736,100,木棉花
1,大同區,5.6815,25,521,125909,-134,22161,103,茶花
2,中山區,13.6821,42,869,227266,-121,16610,104,蝴蝶蘭
3,松山區,9.2878,33,763,204043,-150,21969,105,朱槿
4,大安區,11.3614,53,1022,307526,-105,27068,106,波斯菊


In [178]:
#join data in order to get english name of district
parking_coor = parking_coor.join(area_combine.set_index('area_chs'), on='area')

In [303]:
# Rename district column from chinese to english
taipei_density.rename(columns={"區名": "District"}, errors="raise", inplace = True)

In [299]:
#translate all distrinct value from chinese to english
taipei_density = taipei_density.join(df.set_index('Name'), on='區名')
taipei_density.head()

Unnamed: 0,區名,面積（km²）,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude
0,Zhongzheng,7.6071,31,580,157743,-271,20736,100,木棉花,25.094625,121.522533
1,Datong,5.6815,25,521,125909,-134,22161,103,茶花,25.06532,121.5215
2,Zhongshan,13.6821,42,869,227266,-121,16610,104,蝴蝶蘭,25.04841,121.53564
3,Songshan,9.2878,33,763,204043,-150,21969,105,朱槿,25.21675,121.5672
4,Daan,11.3614,53,1022,307526,-105,27068,106,波斯菊,25.03737,121.56355


In [180]:
#prevent mess up, rename latitude to district's latitude
df.rename(columns={'Latitude':'area_latitude', 'Longitude':'area_Longitude'}, inplace = True)

In [181]:
parking_coor = parking_coor.join(df.set_index('Name'), on='area_eng')

### Cleaning Data of parking lot information

In [197]:
# delete all the nan value in parking lot information
parking_coor = parking_coor[parking_coor.Latitude != 0]
parking_coor =parking_coor[parking_coor.Latitude != '0']
parking_coor.head()

Unnamed: 0,name,Latitude,longitude,address,area,area_eng,area_latitude,area_Longitude
0,府前廣場地下停車場,25.03648987,121.5621068,松壽路1號地下,信義區,Xinyi,25.11477,121.52752
1,松壽廣場地下停車場,25.036966,121.565523,松智路75號地下,信義區,Xinyi,25.11477,121.52752
2,臺北市災害應變中心地下停車場,25.028728,121.566111,莊敬路391巷11弄2號地下,信義區,Xinyi,25.11477,121.52752
3,雅祥公園地下停車場,25.04754574,121.5716298,松隆路123巷7號地下,信義區,Xinyi,25.11477,121.52752
4,立農公園地下停車場,25.118127,121.502995,承德路7段372號地下,北投區,Beitou,25.13289,121.50253


In [214]:
# create a category named parking lot
parking_coor['parking_lot'] = 'parking_lot'

In [223]:
parking_coor.columns

Index(['name', 'Latitude', 'longitude', 'address', 'area', 'area_eng',
       'area_latitude', 'area_Longitude', 'parking_lot'],
      dtype='object')

In [225]:
data = {'District':parking_coor['area_eng'],
       'Latitude':parking_coor['area_latitude'],
       'Longitude':parking_coor['area_Longitude'],
       'VenueName':parking_coor['name'],
       'VenueLatitude':parking_coor['Latitude'],
       'VenueLongitude':parking_coor['longitude'],
       'VenueCategory':parking_coor['parking_lot'],}
dtest = pd.DataFrame(data)
dtest.head()

Unnamed: 0,District,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Xinyi,25.11477,121.52752,府前廣場地下停車場,25.03648987,121.5621068,parking_lot
1,Xinyi,25.11477,121.52752,松壽廣場地下停車場,25.036966,121.565523,parking_lot
2,Xinyi,25.11477,121.52752,臺北市災害應變中心地下停車場,25.028728,121.566111,parking_lot
3,Xinyi,25.11477,121.52752,雅祥公園地下停車場,25.04754574,121.5716298,parking_lot
4,Beitou,25.13289,121.50253,立農公園地下停車場,25.118127,121.502995,parking_lot


In [221]:
venues_df.head()

Unnamed: 0,District,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Zhongzheng,25.094625,121.522533,華榮街菜市場,25.094891,121.523713,Farmers Market
1,Zhongzheng,25.094625,121.522533,以利泡泡冰（士林本店）,25.094873,121.523737,Ice Cream Shop
2,Zhongzheng,25.094625,121.522533,Cha for Tea (喫茶趣),25.094877,121.528528,Tea Room
3,Zhongzheng,25.094625,121.522533,誠品士林店 Eslite Bookstore,25.092952,121.525952,Bookstore
4,Zhongzheng,25.094625,121.522533,Taipei Children's Amusement Park (台北市立兒童新樂園),25.097059,121.515427,Theme Park


In [228]:
venues_df = pd.concat([dtest,venues_df])

Unnamed: 0,District,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Xinyi,25.11477,121.52752,府前廣場地下停車場,25.03648987,121.5621068,parking_lot
1,Xinyi,25.11477,121.52752,松壽廣場地下停車場,25.036966,121.565523,parking_lot
2,Xinyi,25.11477,121.52752,臺北市災害應變中心地下停車場,25.028728,121.566111,parking_lot
3,Xinyi,25.11477,121.52752,雅祥公園地下停車場,25.04754574,121.5716298,parking_lot
4,Beitou,25.13289,121.50253,立農公園地下停車場,25.118127,121.502995,parking_lot
5,Beitou,25.13289,121.50253,捷運奇岩站轉乘停車場,25.034795,121.572818,parking_lot
6,Wanhua,25.03694,121.49944,萬華國中地下停車場,25.029537,121.49954,parking_lot
7,Zhongshan,25.04841,121.53564,捷運劍南站轉乘停車場,25.03607,121.56306,parking_lot
8,Xinyi,25.11477,121.52752,興雅國中地下停車場,25.03479453,121.5728183,parking_lot
9,Wanhua,25.03694,121.49944,峨眉立體停車場,25.04455,121.50561,parking_lot


In [230]:
np.unique(venues_df.District)

array(['Beitou', 'Daan', 'Datong', 'Nangang', 'Neihu', 'Shilin',
       'Songshan', 'Wanhua', 'Wenshan', 'Xinyi', 'Zhongshan',
       'Zhongzheng'], dtype=object)

## Methodology <a name="methodology"></a>

In this project, we will work to detect districts in Taipei that have a low density of parking lot and a high population density and high density of store/restaurant . If possible, these areas will also have an increasing population. The search radius for each district will be 2000m around the district center.

Above, we collected the necessary data from the Taipei City government and from Foursquare. This data was organized into a dataframe.

Next, we will create clusters within the districts using k-means clustering. We hope to identify a cluster with an increasing population, high population density, and a district with convenience stores outside of their top ten most popular venues.

In [233]:
taipei_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

taipei_onehot['District'] = venues_df['District'] 

fixed_columns = [taipei_onehot.columns[-1]] + list(taipei_onehot.columns[:-1])
taipei_onehot = taipei_onehot[fixed_columns]

print(taipei_onehot.shape)
taipei_onehot.head()

(1116, 163)


Unnamed: 0,District,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bar,Beer Bar,Beijing Restaurant,Bike Rental / Bike Share,Bistro,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Buddhist Temple,Buffet,Building,Burger Joint,Bus Station,Bus Stop,Cable Car,Cafeteria,Café,Cantonese Restaurant,Chinese Breakfast Place,Chinese Restaurant,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Convention Center,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dumpling Restaurant,Duty-free Shop,Electronics Store,Exhibit,Farmers Market,Fast Food Restaurant,Film Studio,Fish Market,Flower Shop,Food Court,Food Stand,Food Truck,Fried Chicken Joint,Furniture / Home Store,Garden,Gay Bar,German Restaurant,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Historic Site,History Museum,Hong Kong Restaurant,Hostel,Hot Spring,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Lake,Leather Goods Store,Lounge,Market,Massage Studio,Metro Station,Mexican Restaurant,Mobile Phone Shop,Mongolian Restaurant,Motorcycle Shop,Mountain,Movie Theater,Museum,Night Market,Nightclub,Noodle House,Office,Other Great Outdoors,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Planetarium,Plaza,Pool,Public Art,Ramen Restaurant,Record Shop,Resort,Restaurant,River,Rock Club,Salad Place,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shanxi Restaurant,Shoe Store,Shopping Mall,Snack Place,Soup Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tea Room,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Tonkatsu Restaurant,Toy / Game Store,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Warehouse Store,Wine Bar,Yunnan Restaurant,Zoo,Zoo Exhibit,parking_lot
0,Xinyi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,Xinyi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,Xinyi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Xinyi,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,Beitou,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [234]:
taipei_grouped = taipei_onehot.groupby('District').mean().reset_index()
taipei_grouped

Unnamed: 0,District,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bar,Beer Bar,Beijing Restaurant,Bike Rental / Bike Share,Bistro,Bookstore,Breakfast Spot,Brewery,Bubble Tea Shop,Buddhist Temple,Buffet,Building,Burger Joint,Bus Station,Bus Stop,Cable Car,Cafeteria,Café,Cantonese Restaurant,Chinese Breakfast Place,Chinese Restaurant,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Convention Center,Cultural Center,Cupcake Shop,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Donburi Restaurant,Donut Shop,Dumpling Restaurant,Duty-free Shop,Electronics Store,Exhibit,Farmers Market,Fast Food Restaurant,Film Studio,Fish Market,Flower Shop,Food Court,Food Stand,Food Truck,Fried Chicken Joint,Furniture / Home Store,Garden,Gay Bar,German Restaurant,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Historic Site,History Museum,Hong Kong Restaurant,Hostel,Hot Spring,Hotel,Hotel Bar,Hotpot Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indoor Play Area,Italian Restaurant,Japanese Curry Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Lake,Leather Goods Store,Lounge,Market,Massage Studio,Metro Station,Mexican Restaurant,Mobile Phone Shop,Mongolian Restaurant,Motorcycle Shop,Mountain,Movie Theater,Museum,Night Market,Nightclub,Noodle House,Office,Other Great Outdoors,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Planetarium,Plaza,Pool,Public Art,Ramen Restaurant,Record Shop,Resort,Restaurant,River,Rock Club,Salad Place,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shabu-Shabu Restaurant,Shanghai Restaurant,Shanxi Restaurant,Shoe Store,Shopping Mall,Snack Place,Soup Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tea Room,Temple,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Park Ride / Attraction,Tonkatsu Restaurant,Toy / Game Store,Trail,Train,Train Station,Vegetarian / Vegan Restaurant,Warehouse Store,Wine Bar,Yunnan Restaurant,Zoo,Zoo Exhibit,parking_lot
0,Beitou,0.0,0.0,0.0,0.0,0.025974,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.025974,0.0,0.0,0.0,0.038961,0.0,0.0,0.064935,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.012987,0.012987,0.0,0.0,0.077922,0.12987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.051948,0.0,0.0,0.038961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.038961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.181818
1,Daan,0.0,0.0,0.0,0.0,0.009346,0.0,0.018692,0.0,0.028037,0.028037,0.009346,0.0,0.0,0.0,0.028037,0.009346,0.0,0.009346,0.0,0.0,0.009346,0.0,0.0,0.0,0.0,0.0,0.046729,0.0,0.0,0.028037,0.0,0.0,0.037383,0.028037,0.0,0.0,0.0,0.0,0.0,0.0,0.065421,0.037383,0.009346,0.0,0.0,0.0,0.018692,0.0,0.009346,0.0,0.0,0.0,0.0,0.0,0.0,0.009346,0.0,0.0,0.009346,0.009346,0.0,0.0,0.0,0.0,0.0,0.0,0.009346,0.0,0.0,0.0,0.009346,0.009346,0.0,0.0,0.037383,0.009346,0.028037,0.009346,0.0,0.009346,0.0,0.0,0.0,0.018692,0.009346,0.0,0.0,0.009346,0.009346,0.0,0.0,0.0,0.0,0.009346,0.0,0.009346,0.0,0.009346,0.0,0.0,0.0,0.028037,0.0,0.0,0.009346,0.0,0.0,0.0,0.0,0.0,0.0,0.018692,0.0,0.018692,0.0,0.0,0.0,0.009346,0.0,0.0,0.0,0.009346,0.009346,0.0,0.009346,0.0,0.0,0.009346,0.0,0.009346,0.0,0.009346,0.0,0.0,0.009346,0.0,0.018692,0.0,0.0,0.018692,0.009346,0.018692,0.009346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009346,0.009346,0.0,0.0,0.028037,0.0,0.009346,0.0,0.0,0.0,0.065421
2,Datong,0.0,0.0,0.009091,0.018182,0.027273,0.0,0.009091,0.009091,0.027273,0.0,0.009091,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.009091,0.0,0.0,0.0,0.0,0.0,0.0,0.027273,0.0,0.0,0.036364,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.054545,0.0,0.0,0.0,0.009091,0.027273,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.009091,0.009091,0.009091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009091,0.0,0.009091,0.0,0.0,0.009091,0.0,0.090909,0.0,0.054545,0.018182,0.0,0.0,0.0,0.0,0.009091,0.045455,0.0,0.0,0.0,0.0,0.0,0.027273,0.018182,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009091,0.009091,0.009091,0.0,0.0,0.009091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0,0.0,0.009091,0.018182,0.009091,0.0,0.0,0.0,0.009091,0.0,0.0,0.0,0.0,0.090909,0.009091,0.009091,0.0,0.0,0.009091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009091,0.0,0.0,0.090909
3,Nangang,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.0,0.010101,0.0,0.0,0.010101,0.010101,0.010101,0.020202,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.040404,0.0,0.0,0.050505,0.010101,0.0,0.0,0.090909,0.010101,0.0,0.141414,0.010101,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.050505,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.040404,0.010101,0.0,0.030303,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.0,0.0,0.0,0.010101,0.0,0.020202,0.0,0.010101,0.0,0.010101,0.010101,0.0,0.0,0.0,0.010101,0.020202,0.0,0.0,0.0,0.010101,0.0,0.010101,0.0,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.070707
4,Neihu,0.018018,0.0,0.0,0.0,0.018018,0.009009,0.018018,0.0,0.027027,0.009009,0.0,0.0,0.0,0.0,0.0,0.018018,0.0,0.0,0.0,0.0,0.0,0.0,0.009009,0.0,0.0,0.0,0.045045,0.009009,0.0,0.045045,0.0,0.009009,0.0,0.072072,0.0,0.0,0.126126,0.0,0.0,0.0,0.0,0.009009,0.0,0.0,0.0,0.0,0.0,0.009009,0.009009,0.0,0.009009,0.027027,0.0,0.0,0.009009,0.009009,0.0,0.009009,0.0,0.009009,0.0,0.0,0.0,0.0,0.009009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009009,0.0,0.009009,0.0,0.0,0.0,0.0,0.036036,0.0,0.054054,0.0,0.009009,0.009009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009009,0.0,0.0,0.0,0.009009,0.0,0.036036,0.0,0.009009,0.0,0.0,0.0,0.0,0.0,0.018018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009009,0.0,0.009009,0.009009,0.0,0.0,0.018018,0.0,0.018018,0.0,0.036036,0.027027,0.0,0.0,0.009009,0.0,0.009009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009009,0.0,0.0,0.0,0.0,0.099099
5,Shilin,0.0,0.017241,0.008621,0.017241,0.017241,0.0,0.0,0.0,0.025862,0.0,0.0,0.0,0.0,0.0,0.008621,0.025862,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.0,0.0,0.043103,0.008621,0.017241,0.068966,0.0,0.0,0.008621,0.051724,0.0,0.008621,0.0,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.008621,0.0,0.017241,0.0,0.008621,0.0,0.008621,0.008621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.008621,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.008621,0.0,0.103448,0.008621,0.017241,0.017241,0.0,0.008621,0.0,0.0,0.0,0.051724,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.008621,0.017241,0.008621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.008621,0.008621,0.0,0.017241,0.008621,0.0,0.0,0.008621,0.0,0.0,0.008621,0.0,0.051724,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008621,0.0,0.0,0.0,0.0,0.0,0.137931
6,Songshan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
7,Wanhua,0.009174,0.009174,0.0,0.0,0.018349,0.0,0.0,0.0,0.018349,0.0,0.009174,0.0,0.0,0.009174,0.018349,0.009174,0.0,0.027523,0.009174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055046,0.0,0.0,0.009174,0.0,0.009174,0.0,0.045872,0.0,0.0,0.0,0.0,0.009174,0.0,0.0,0.027523,0.0,0.0,0.0,0.009174,0.009174,0.0,0.0,0.0,0.0,0.009174,0.0,0.0,0.0,0.0,0.0,0.018349,0.0,0.0,0.009174,0.009174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.036697,0.0,0.0,0.027523,0.0,0.036697,0.0,0.045872,0.018349,0.0,0.0,0.0,0.009174,0.0,0.0,0.009174,0.0,0.0,0.0,0.0,0.0,0.009174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009174,0.018349,0.0,0.073394,0.0,0.0,0.018349,0.0,0.009174,0.0,0.009174,0.0,0.0,0.0,0.0,0.0,0.009174,0.018349,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018349,0.0,0.0,0.0,0.018349,0.009174,0.009174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009174,0.009174,0.009174,0.073394,0.009174,0.009174,0.0,0.0,0.0,0.009174,0.009174,0.0,0.0,0.009174,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.082569
8,Wenshan,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.012821,0.012821,0.012821,0.025641,0.012821,0.038462,0.0,0.0,0.025641,0.0,0.0,0.0,0.089744,0.0,0.0,0.166667,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.025641,0.025641,0.012821,0.0,0.0,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.089744,0.102564
9,Xinyi,0.009524,0.0,0.0,0.0,0.009524,0.009524,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.0,0.0,0.019048,0.0,0.0,0.0,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.038095,0.0,0.009524,0.085714,0.0,0.009524,0.0,0.047619,0.0,0.0,0.104762,0.0,0.0,0.0,0.019048,0.019048,0.0,0.0,0.0,0.0,0.019048,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.019048,0.0,0.0,0.0,0.009524,0.009524,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009524,0.0,0.019048,0.019048,0.009524,0.0,0.0,0.019048,0.0,0.019048,0.0,0.009524,0.0,0.0,0.0,0.019048,0.0,0.0,0.009524,0.0,0.0,0.0,0.009524,0.0,0.0,0.009524,0.0,0.009524,0.0,0.0,0.038095,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009524,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.0,0.009524,0.0,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.009524,0.0,0.0,0.028571,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.019048,0.0,0.0,0.009524,0.0,0.0,0.0,0.0,0.0,0.133333


In [235]:
num_top_venues = 5

for hood in taipei_grouped['District']:
    print("----"+hood+"----")
    temp = taipei_grouped[taipei_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Beitou----
               venue  freq
0        parking_lot  0.18
1              Hotel  0.13
2         Hot Spring  0.08
3  Convenience Store  0.06
4       Noodle House  0.05


----Daan----
              venue  freq
0       parking_lot  0.07
1  Department Store  0.07
2              Café  0.05
3      Cocktail Bar  0.04
4             Hotel  0.04


----Datong----
                  venue  freq
0           parking_lot  0.09
1  Taiwanese Restaurant  0.09
2                 Hotel  0.09
3   Japanese Restaurant  0.05
4           Coffee Shop  0.05


----Nangang----
                  venue  freq
0     Convenience Store  0.14
1           Coffee Shop  0.09
2           parking_lot  0.07
3  Fast Food Restaurant  0.05
4    Chinese Restaurant  0.05


----Neihu----
                 venue  freq
0    Convenience Store  0.13
1          parking_lot  0.10
2          Coffee Shop  0.07
3                 Café  0.05
4  Japanese Restaurant  0.05


----Shilin----
                  venue  freq
0           parking_

In [236]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [239]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
districts_venues_sorted = pd.DataFrame(columns=columns)
districts_venues_sorted['District'] = taipei_grouped['District']

for ind in np.arange(taipei_grouped.shape[0]):
    districts_venues_sorted.iloc[ind, 1:] = return_most_common_venues(taipei_grouped.iloc[ind, :], num_top_venues)

districts_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Beitou,parking_lot,Hotel,Hot Spring,Convenience Store,Noodle House,Coffee Shop,Resort,Park,Chinese Restaurant,Asian Restaurant
1,Daan,parking_lot,Department Store,Café,Hotel,Cocktail Bar,Dessert Shop,Bakery,Chinese Restaurant,Bookstore,Hotpot Restaurant
2,Datong,parking_lot,Hotel,Taiwanese Restaurant,Hotpot Restaurant,Dessert Shop,Coffee Shop,Japanese Restaurant,Chinese Restaurant,Café,Asian Restaurant
3,Nangang,Convenience Store,Coffee Shop,parking_lot,Chinese Restaurant,Fast Food Restaurant,Café,Japanese Restaurant,Noodle House,Hotel,Park
4,Neihu,Convenience Store,parking_lot,Coffee Shop,Japanese Restaurant,Café,Chinese Restaurant,Italian Restaurant,Noodle House,Supermarket,Bakery
5,Shilin,parking_lot,Hotel,Chinese Restaurant,Coffee Shop,Taiwanese Restaurant,Japanese Restaurant,Café,Breakfast Spot,Bakery,Ramen Restaurant
6,Songshan,parking_lot,Department Store,Fried Chicken Joint,Food Truck,Food Stand,Food Court,Flower Shop,Fish Market,Film Studio,Fast Food Restaurant
7,Wanhua,parking_lot,Noodle House,Taiwanese Restaurant,Café,Hotpot Restaurant,Coffee Shop,Hotel,Historic Site,Hostel,Bubble Tea Shop
8,Wenshan,Convenience Store,parking_lot,Coffee Shop,Zoo Exhibit,Café,Noodle House,Japanese Restaurant,Cable Car,Italian Restaurant,Chinese Restaurant
9,Xinyi,parking_lot,Convenience Store,Chinese Restaurant,Coffee Shop,Park,Café,Pizza Place,Supermarket,Fast Food Restaurant,Thai Restaurant


In [324]:
# set number of clusters
kclusters = 6

taipei_grouped_clustering = taipei_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(taipei_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 4, 3, 3, 4, 1, 2, 5, 3], dtype=int32)

In [325]:
# add clustering labels
#districts_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)

taipei_merged = taipei_density

# merge taipei_grouped with taipei_data to add latitude/longitude for each neighborhood
taipei_merged = taipei_merged.join(districts_venues_sorted.set_index('District'), on='District')

taipei_merged.head() # check the last columns!

Unnamed: 0,District,面積（km²）,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Zhongzheng,7.6071,31,580,157743,-271,20736,100,木棉花,25.094625,121.522533,Café,parking_lot,Breakfast Spot,Chinese Restaurant,Ice Cream Shop,Convenience Store,Hotpot Restaurant,Supermarket,Taiwanese Restaurant,Snack Place
1,Datong,5.6815,25,521,125909,-134,22161,103,茶花,25.06532,121.5215,parking_lot,Hotel,Taiwanese Restaurant,Hotpot Restaurant,Dessert Shop,Coffee Shop,Japanese Restaurant,Chinese Restaurant,Café,Asian Restaurant
2,Zhongshan,13.6821,42,869,227266,-121,16610,104,蝴蝶蘭,25.04841,121.53564,parking_lot,Chinese Restaurant,Hotel,Japanese Restaurant,Taiwanese Restaurant,Noodle House,Coffee Shop,Café,Seafood Restaurant,Bakery
3,Songshan,9.2878,33,763,204043,-150,21969,105,朱槿,25.21675,121.5672,parking_lot,Department Store,Fried Chicken Joint,Food Truck,Food Stand,Food Court,Flower Shop,Fish Market,Film Studio,Fast Food Restaurant
4,Daan,11.3614,53,1022,307526,-105,27068,106,波斯菊,25.03737,121.56355,parking_lot,Department Store,Café,Hotel,Cocktail Bar,Dessert Shop,Bakery,Chinese Restaurant,Bookstore,Hotpot Restaurant


In [326]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11.5)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(taipei_merged['Latitude'], taipei_merged['Longitude'], taipei_merged['District'], kmeans.labels_):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-2],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.95).add_to(map_clusters)
       
map_clusters

### Cluster 0

In [307]:
taipei_merged.loc[kmeans.labels_ == 0, taipei_merged.columns[[1] + list(range(0, taipei_merged.shape[1]))]]

Unnamed: 0,面積（km²）,District,面積（km²）.1,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,7.6071,Zhongzheng,7.6071,31,580,157743,-271,20736,100,木棉花,25.094625,121.522533,Café,parking_lot,Breakfast Spot,Chinese Restaurant,Ice Cream Shop,Convenience Store,Hotpot Restaurant,Supermarket,Taiwanese Restaurant,Snack Place


### Cluster 1

In [308]:
taipei_merged.loc[kmeans.labels_ == 1, taipei_merged.columns[[1] + list(range(0, taipei_merged.shape[1]))]]

Unnamed: 0,面積（km²）,District,面積（km²）.1,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,11.2077,Xinyi,11.2077,41,904,219744,-277,19607,110,野牡丹,25.11477,121.52752,parking_lot,Convenience Store,Chinese Restaurant,Coffee Shop,Park,Café,Pizza Place,Supermarket,Fast Food Restaurant,Thai Restaurant


### Cluster 2

In [309]:
taipei_merged.loc[kmeans.labels_ == 2, taipei_merged.columns[[1] + list(range(0, taipei_merged.shape[1]))]]

Unnamed: 0,面積（km²）,District,面積（km²）.1,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,5.6815,Datong,5.6815,25,521,125909,-134,22161,103,茶花,25.06532,121.5215,parking_lot,Hotel,Taiwanese Restaurant,Hotpot Restaurant,Dessert Shop,Coffee Shop,Japanese Restaurant,Chinese Restaurant,Café,Asian Restaurant
7,62.3682,Shilin,62.3682,51,995,283282,-177,4542,111,玫瑰花,25.05051,121.52929,parking_lot,Hotel,Chinese Restaurant,Coffee Shop,Taiwanese Restaurant,Japanese Restaurant,Café,Breakfast Spot,Bakery,Ramen Restaurant
11,31.509,Wenshan,31.509,43,1002,271674,-132,8622,116,杏花,24.98974,121.56963,Convenience Store,parking_lot,Coffee Shop,Zoo Exhibit,Café,Noodle House,Japanese Restaurant,Cable Car,Italian Restaurant,Chinese Restaurant


### Cluster 3

In [310]:
taipei_merged.loc[kmeans.labels_ == 3, taipei_merged.columns[[1] + list(range(0, taipei_merged.shape[1]))]]

Unnamed: 0,面積（km²）,District,面積（km²）.1,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,9.2878,Songshan,9.2878,33,763,204043,-150,21969,105,朱槿,25.21675,121.5672,parking_lot,Department Store,Fried Chicken Joint,Food Truck,Food Stand,Food Court,Flower Shop,Fish Market,Film Studio,Fast Food Restaurant
4,11.3614,Daan,11.3614,53,1022,307526,-105,27068,106,波斯菊,25.03737,121.56355,parking_lot,Department Store,Café,Hotel,Cocktail Bar,Dessert Shop,Bakery,Chinese Restaurant,Bookstore,Hotpot Restaurant
9,31.5787,Neihu,31.5787,39,906,285526,-269,9042,114,九重葛,25.06909,121.58847,Convenience Store,parking_lot,Coffee Shop,Japanese Restaurant,Café,Chinese Restaurant,Italian Restaurant,Noodle House,Supermarket,Bakery


### Cluster 4

In [311]:
taipei_merged.loc[kmeans.labels_ == 4, taipei_merged.columns[[1] + list(range(0, taipei_merged.shape[1]))]]

Unnamed: 0,面積（km²）,District,面積（km²）.1,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,13.6821,Zhongshan,13.6821,42,869,227266,-121,16610,104,蝴蝶蘭,25.04841,121.53564,parking_lot,Chinese Restaurant,Hotel,Japanese Restaurant,Taiwanese Restaurant,Noodle House,Coffee Shop,Café,Seafood Restaurant,Bakery
5,8.8522,Wanhua,8.8522,36,723,186848,-228,21108,108,白牡丹,25.03694,121.49944,parking_lot,Noodle House,Taiwanese Restaurant,Café,Hotpot Restaurant,Coffee Shop,Hotel,Historic Site,Hostel,Bubble Tea Shop
10,21.8424,Nangang,21.8424,20,452,120161,-136,5501,115,桂花,25.05437,121.60681,Convenience Store,Coffee Shop,parking_lot,Chinese Restaurant,Fast Food Restaurant,Café,Japanese Restaurant,Noodle House,Hotel,Park


### Cluster 5

In [312]:
taipei_merged.loc[kmeans.labels_ == 5, taipei_merged.columns[[1] + list(range(0, taipei_merged.shape[1]))]]

Unnamed: 0,面積（km²）,District,面積（km²）.1,下轄里數,下轄鄰數,人口數,人口消長,人口密度（人/km²）,郵遞區號,區花,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,56.8216,Beitou,56.8216,42,827,253155,-164,4455,112,櫻花,25.13289,121.50253,parking_lot,Hotel,Hot Spring,Convenience Store,Noodle House,Coffee Shop,Resort,Park,Chinese Restaurant,Asian Restaurant


## Results and Discussion <a name="results"></a>

A visual analysis of the clusters shows that cluster 0 has one districts with high population density. In addition,  Zhongzheng district do not have parking lot in the top ten most common venues.

It would be my advice to target these areas for opening a new parking slot. As we can see, market of parking lot has already almost saturated. high density, small area are to key to expand market of parking lot, but seem most of the district got top one in the top most common venues.

### Dig Deeper

in this project, we only see feature like district area, population or top common venue to decide which area is best place to open new parking lot, but the truth is that it needs to look through more feature to determine which place is the best. if we really need to find out more possibility, we should find more venue data, and sales volumn of car in every month with differnet district, then we can use those data to predict which place will need more parking lot in futre 3 years. 

### Address to latitude

In [83]:
parking_coor.address.shape

(1994,)

In [84]:
!pip install -U googlemaps

Collecting googlemaps
  Downloading https://files.pythonhosted.org/packages/07/b8/bd7ab78014a4290853250ac8a1744c5a200e569811b7e0cc9222d38fc296/googlemaps-4.2.0-py3-none-any.whl
Installing collected packages: googlemaps
Successfully installed googlemaps-4.2.0


In [67]:
test20 = parking_coor.head(100)

In [69]:
test = []

In [70]:
for i, element in enumerate(test20.address):
    try:
        geocode_result = gmaps.geocode(element)
        test.append(geocode_result[0]['geometry']['location'])
        print(i,element)
        
    except:
        print(i,element)
        test.append(i)

0 松壽路1號地下
1 松智路75號地下
2 莊敬路391巷11弄2號地下
3 松隆路123巷7號地下
4 承德路7段372號地下
5 台北市
6 西藏路201號地下
7 台北市中山區北安路798號1樓、B1-B2
8 松德路200巷18號地下
9 峨眉街83號
10 延平南路98號地下
11 環河南路1段1號
12 塔城街11號地下
13 承德路4段175號地下
14 前港街45號地下
15 中坡北路57號地下
16 民權東路4段180號地下
17 金華街190號地下
18 中華路2段606巷1號地下
19 景華街55號地下
20 基隆路1段156號地下
21 東湖路115號之1地下
22 歸綏街243號地下
23 復興南路1段340巷11號地下
24 健康路325巷19弄1號地下
25 八德路2段158號地下
26 辛亥路4段103號地下
27 承德路2段235號之1地下
28 大興街145號地下
29 信義路3段166巷6弄12號地下
30 成福路1號
31 延平北路6段180號
32 延壽街168號地下
33 松仁路2號地下
34 長安西路37號之一地下
35 興中路44巷1號
36 國興路5號地下
37 新光路2段28號
38 信義路5段11號地下
39 大業路745號
40 成功路5段7號地下
41 民生東路5段163號之1號地下2、3F層
42 民生東路5段84號
43 八德路3段25號地下1層及地下2層
44 西寧北路-塔城街(地下)
45 建國南路2段2號地下
46 公園路-中山北路(地下)
47 中山北路-林森北路(地下)
48 林森北路-金山北路(地下)
49 建國北路-復興南路(地下)
50 湖山路(陽明山公園大門口旁)
51 復興南路-敦化南路(地下)
52 敦化南路-延吉街(地下)
53 延吉街兩側(地下)
54 大龍街136號
55 西園路2段320巷55弄3號地下。
56 文林路615巷20號地下
57 青年路69號地下
58 建國北路3段39號地下
59 重慶北路2段68號地下
60 中正路17號地下
61 南京東路1段35號地下
62 市民大道5段99號(地下一層)
63 八德路4段101號地下
64 陽金公路(陽明山國家公園遊客服務中心對面)
65 湖山路2段(陽明山公園花鐘前)
66 瑞光路513巷22弄2號
67 忠孝東路4

In [286]:
address_coor = []

In [291]:
for i, element in enumerate(parking_coor.address):
    try:
        geocode_result = gmaps.geocode(element)
        address_coor.append(geocode_result[0]['geometry']['location'])
        
    except:
        address_coor.append(i)
    

In [None]:
import pandas
df = pandas.DataFrame(data={"col1": list_1, "col2": list_2})
df.to_csv("./file.csv", sep=',',index=False)

In [54]:
google_act = 'AIzaSyArzfeX3BtJ27KFMjzGWhV4rVVw8VlAqNY'

import googlemaps
from datetime import datetime

gmaps = googlemaps.Client(key=google_act)

# Geocoding an address
geocode_result = gmaps.geocode('松壽路1號地下')

# Look up an address with reverse geocoding
reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

In [55]:
geocode_result

[{'address_components': [{'long_name': '號地下',
    'short_name': '號地下',
    'types': ['subpremise']},
   {'long_name': '1', 'short_name': '1', 'types': ['street_number']},
   {'long_name': 'Songshou Road',
    'short_name': 'Songshou Road',
    'types': ['route']},
   {'long_name': 'Xinyi District',
    'short_name': 'Xinyi District',
    'types': ['administrative_area_level_3', 'political']},
   {'long_name': 'Taipei City',
    'short_name': 'Taipei City',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'Taiwan',
    'short_name': 'TW',
    'types': ['country', 'political']},
   {'long_name': '110', 'short_name': '110', 'types': ['postal_code']}],
  'formatted_address': '號地下, No. 1, Songshou Road, Xinyi District, Taipei City, Taiwan 110',
  'geometry': {'location': {'lat': 25.0360451, 'lng': 121.5626306},
   'location_type': 'ROOFTOP',
   'viewport': {'northeast': {'lat': 25.0373940802915,
     'lng': 121.5639795802915},
    'southwest': {'lat': 25.03469611

In [42]:
add_lat = pd.read_csv("file.csv") 

In [56]:
add_lat.tail()

Unnamed: 0,col1
2035,1989
2036,1990
2037,1991
2038,1992
2039,1993


In [289]:
geocode_result[0]['geometry']['location']

{'lat': 25.0360451, 'lng': 121.5626306}

In [244]:
parking_coor.groupby('area_eng').count()


Unnamed: 0_level_0,name,Latitude,longitude,area,area_latitude,area_Longitude
area_eng,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Beitou,129,129,129,129,129,129
Daan,305,305,305,305,305,305
Datong,90,90,90,90,90,90
Nangang,98,98,98,98,98,98
Neihu,228,228,228,228,228,228
Shilin,161,161,161,161,161,161
Songshan,167,167,167,167,167,167
Wanhua,87,87,87,87,87,87
Wenshan,85,85,85,85,85,85
Xinyi,143,143,143,143,143,143


In [None]:
import re
address_to_x = [get_latitude_longtitude(address) for address in Xcod_parkinglot_data if re.match(r'\d+\.\d+', address) is None and address]

In [51]:
sun_parking_spot = len(parkinglot_data['data']['park'])

In [186]:
import re

In [150]:
# get the coordinates of Kuala Lumpur
address = '木柵路三段'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kuala Lumpur, Malaysiae {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kuala Lumpur, Malaysiae 24.9885752, 121.5642503.


In [27]:
import requests
import urllib.parse
import json
from time import sleep

def get_latitude_longtitude(address):
    # decode url
    address = urllib.parse.quote(address)
    key= '&key=AIzaSyArzfeX3BtJ27KFMjzGWhV4rVVw8VlAqNY'
    url = "https://maps.googleapis.com/maps/api/geocode/json?address=" + address+key
    
    while True:
        res = requests.get(url)
        js = json.loads(res.text)

        if js["status"] != "OVER_QUERY_LIMIT":
            sleep(1)
            break
    if js["results"]:
        result = js["results"][0]["geometry"]["location"]
        lat = result["lat"]
        lng = result["lng"]
    else:
        lat = None
        lng= None

    return lat, lng



address = "重慶南路一段122號"
lat, lng = get_latitude_longtitude(address)

In [31]:
lat, lng

(25.0403593, 121.5122878)