# A foodie's guide to Osaka


## Data collection and data preparation

Import libraries

In [61]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
import json
import random
from geopy.geocoders import Nominatim
import requests
from bs4 import BeautifulSoup 
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from folium import plugins
import matplotlib.pyplot as plt
import os
from IPython.display import Image 
from IPython.core.display import HTML

print('Libraries imported.')

Libraries imported.


Scraping postal codes of Osaka

In [36]:
japan = requests.get('https://en.wikipedia.org/wiki/Wards_of_Japan').text
soup = BeautifulSoup(japan,'lxml')
japan_table = soup.find('table', {'class':'wikitable sortable'})
column_names = ['Wards','Kanji','City','Area']
df = pd.DataFrame(columns=column_names)
df.head()

Unnamed: 0,Wards,Kanji,City,Area


In [37]:
for tr_cell in japan_table.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==4:
        df.loc[len(df)] = row_data
df=df[df['City'].isin(['Osaka'])]
print(df.shape)
df.head()

(24, 4)


Unnamed: 0,Wards,Kanji,City,Area
95,Abeno,阿倍野区,Osaka,5.98
96,Asahi,旭区,Osaka,6.32
97,Chūō,中央区,Osaka,8.87
98,Fukushima,福島区,Osaka,4.67
99,Higashinari,東成区,Osaka,4.54


Get the geographical coordinates of Wards in Osaka.

In [71]:
df["new"] = df['Wards'] + '-ku, Osaka' #Creat a new column with '-ku, Osaka' added to the ward names to increase the accuracy for coordinates results.
geolocator = Nominatim(user_agent="Osaka_explorer")
df['Major_Dist_Coord']= df['new'].apply(geolocator.geocode).apply(lambda x: (x.latitude, x.longitude))
df[['Latitude', 'Longitude']] = df['Major_Dist_Coord'].apply(pd.Series)

df.drop(['Major_Dist_Coord','new'], axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)
print(df.shape)
df.head(24)

(24, 6)


Unnamed: 0,Wards,Kanji,City,Area,Latitude,Longitude
0,Abeno,阿倍野区,Osaka,5.98,34.627501,135.514095
1,Asahi,旭区,Osaka,6.32,34.726483,135.546952
2,Chūō,中央区,Osaka,8.87,34.679846,135.510316
3,Fukushima,福島区,Osaka,4.67,34.692104,135.474812
4,Higashinari,東成区,Osaka,4.54,34.672912,135.550567
5,Higashisumiyoshi,東住吉区,Osaka,9.75,34.615662,135.531096
6,Higashiyodogawa,東淀川区,Osaka,13.27,34.740212,135.517432
7,Hirano,平野区,Osaka,15.28,34.603715,135.559027
8,Ikuno,生野区,Osaka,8.37,34.653003,135.547722
9,Joto,城東区,Osaka,8.38,34.693887,135.547769


In [0]:
# save the DataFrame as CSV file
df.to_csv("osaka_df.csv", index=False)

In [73]:
osaka_df = pd.read_csv("osaka_df.csv")
osaka_df.drop(columns=["City"],inplace=True)
osaka_df.head()

Unnamed: 0,Wards,Kanji,Area,Latitude,Longitude
0,Abeno,阿倍野区,5.98,34.627501,135.514095
1,Asahi,旭区,6.32,34.726483,135.546952
2,Chūō,中央区,8.87,34.679846,135.510316
3,Fukushima,福島区,4.67,34.692104,135.474812
4,Higashinari,東成区,4.54,34.672912,135.550567


Create a map of Osaka with wards superimposed on top

In [0]:
address = 'Chūōku, Osaka'
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
lat_chuoku = location.latitude
lon_chuoku = location.longitude

In [100]:
map_osaka = folium.Map(location=[lat_chuoku, lon_chuoku], zoom_start=12) #using the coordinates of Chūōku to create the map, providing the best view of Osaka

mark_ward = folium.map.FeatureGroup()
for lat, lng, label in zip(osaka_df['Latitude'],osaka_df['Longitude'],osaka_df['Wards']):
    mark_ward.add_child(
    folium.CircleMarker(
        [lat, lng],
        radius=15,
        popup=label,
        color='white',
        fill_color='#3193cc',
        fill_opacity=0.3
        )
    )


map_osaka.add_child(mark_ward)

Define Foursquare Credentials and Version

In [0]:
CLIENT_ID = 'LCB1PXFFN0RYEONQAASUFW1RS0VWXQJEQM5JJCKT15VMJVPC'
CLIENT_SECRET = 'OLNEHS3UZSZJVM43F4FBH3BLA4T0VDLPFNY1RAY5TDUD4ZKV'
VERSION = '20200504'
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius

Create a function to get the nearby venues for all neighborhoods in Toronto

In [0]:
def getNearbyVenues(names, latitudes, longitudes):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood_Latitude', 
                  'Neighborhood_Longitude', 
                  'Venue', 
                  'Venue_Latitude', 
                  'Venue_Longitude', 
                  'Venue_Category']
    
    return(nearby_venues)

In [0]:
toronto_venues = getNearbyVenues(names=geo_new['Neighborhood'],
                                   latitudes=geo_new['Latitude'],
                                   longitudes=geo_new['Longitude']
                                  )

Malvern,  Rouge
Rouge Hill,  Port Union,  Highland Creek
Guildwood,  Morningside,  West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park,  Ionview,  East Birchmount Park
Golden Mile,  Clairlea,  Oakridge
Cliffside,  Cliffcrest,  Scarborough Village West
Birch Cliff,  Cliffside West
Dorset Park,  Wexford Heights,  Scarborough Town Centre
Wexford,  Maryvale
Agincourt
Clarks Corners,  Tam O'Shanter,  Sullivan
Milliken,  Agincourt North,  Steeles East,  L'Amoreaux East
Steeles West,  L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview,  Henry Farm,  Oriole
Bayview Village
York Mills,  Silver Hills
Willowdale,  Newtonbrook
Willowdale
York Mills West
Willowdale
Parkwoods
Don Mills
Don Mills
Bathurst Manor,  Wilson Heights,  Downsview North
Northwood Park,  York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill,  Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West,  Riverdale
India Bazaar,  The Beaches 

In [0]:
print(toronto_venues.shape)
toronto_venues.head()

(4879, 7)


Unnamed: 0,Neighborhood,Neighborhood_Latitude,Neighborhood_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,"Malvern, Rouge",43.806686,-79.194353,Harvey's,43.80002,-79.198307,Restaurant
1,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
2,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
3,"Malvern, Rouge",43.806686,-79.194353,RBC Royal Bank,43.798782,-79.19709,Bank
4,"Malvern, Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant


Create a data frame with **Restaurant**.

In [0]:
toronto_restaurant = toronto_venues[toronto_venues['Venue_Category'].str.contains('Restaurant')].reset_index(drop=True)
toronto_restaurant.index = np.arange(1, len(toronto_restaurant)+1)
toronto_restaurant.reset_index(drop=True, inplace=True)
print(toronto_restaurant.shape)
toronto_restaurant.head(200)

(1181, 7)


Unnamed: 0,Neighborhood,Neighborhood_Latitude,Neighborhood_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,"Malvern, Rouge",43.806686,-79.194353,Harvey's,43.80002,-79.198307,Restaurant
1,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
2,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
3,"Malvern, Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
4,"Malvern, Rouge",43.806686,-79.194353,Charley's Exotic Cuisine,43.800982,-79.200233,Chinese Restaurant
5,"Malvern, Rouge",43.806686,-79.194353,Mr. Greek,43.799853,-79.198234,Greek Restaurant
6,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Fratelli Village Pizzeria,43.784008,-79.169787,Italian Restaurant
7,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet,43.768122,-79.190493,Restaurant
8,"Guildwood, Morningside, West Hill",43.763573,-79.188711,KFC,43.7689,-79.1856,Fast Food Restaurant
9,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Taste Of Greek Cuisine,43.768259,-79.189608,Greek Restaurant


Investigate the restaurant categories

In [0]:
print('There are {} categories for the restaurant in Toronto.'.format(len(toronto_restaurant['Venue_Category'].unique())))
print (toronto_restaurant['Venue_Category'].value_counts())

There are 58 categories for the restaurant in Toronto.
Restaurant                       139
Italian Restaurant               105
Japanese Restaurant               88
Sushi Restaurant                  81
Fast Food Restaurant              65
Thai Restaurant                   56
Chinese Restaurant                46
Vegetarian / Vegan Restaurant     46
Indian Restaurant                 46
American Restaurant               42
Seafood Restaurant                39
Mexican Restaurant                38
Middle Eastern Restaurant         38
Greek Restaurant                  32
Vietnamese Restaurant             31
Asian Restaurant                  29
French Restaurant                 27
Korean Restaurant                 27
Caribbean Restaurant              26
Ramen Restaurant                  26
Mediterranean Restaurant          18
New American Restaurant           10
Comfort Food Restaurant            9
Ethiopian Restaurant               8
Eastern European Restaurant        8
Latin American Resta