# Tokyo vs Osaka

## Scrape wikipedia for wards

In [2]:
!conda install -c anaconda beautifulsoup4 -y

Collecting package metadata: ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

# get wards for osaka
response = requests.get('https://en.wikipedia.org/wiki/Category:Wards_of_Osaka')
soup = BeautifulSoup(response.content)
soup.title

<title>Category:Wards of Osaka - Wikipedia</title>

In [19]:
columns = ['City', 'Ward', 'Latitude', 'Longitude']
df = pd.DataFrame(columns=columns)

for item in soup.find('div', {'id': 'mw-pages'}).find_all('li'):
    ward = item.a.get_text()[0:-7]
    from geopy.geocoders import Nominatim
    geolocator = Nominatim(user_agent="final-proj")
    location = geolocator.geocode('{}, Osaka'.format(ward))
    df = df.append({'City': 'Osaka',
                    'Ward': ward,
                    'Latitude': location.latitude,
                    'Longitude': location.longitude}, ignore_index=True)
df

Unnamed: 0,City,Ward,Latitude,Longitude
0,Osaka,Abeno-ku,34.627501,135.514095
1,Osaka,Asahi-ku,34.726483,135.546952
2,Osaka,Chūō-ku,34.679846,135.510316
3,Osaka,Fukushima-ku,34.692104,135.474812
4,Osaka,Higashinari-ku,34.672912,135.550567
5,Osaka,Higashisumiyoshi-ku,34.615662,135.531096
6,Osaka,Higashiyodogawa-ku,34.740212,135.517432
7,Osaka,Hirano-ku,34.603715,135.559027
8,Osaka,Ikuno-ku,34.653003,135.547722
9,Osaka,Jōtō-ku,34.693887,135.547769


In [25]:
# get wards for tokyo
response = requests.get('https://en.wikipedia.org/wiki/Special_wards_of_Tokyo')
soup = BeautifulSoup(response.content)
soup.title

<title>Special wards of Tokyo - Wikipedia</title>

In [42]:
for item in soup.find_all('table')[3].find_all('tr'):
    if item.th is not None:
        continue
    try:
        ward = item.find_all('td')[2].a.get_text()
    except:
        break
    
    geolocator = Nominatim(user_agent="final-proj")
    location = geolocator.geocode('{}, Tokyo'.format(ward))
    df = df.append({'City': 'Tokyo',
                    'Ward': ward,
                    'Latitude': location.latitude,
                    'Longitude': location.longitude}, ignore_index=True)

df

Unnamed: 0,City,Ward,Latitude,Longitude
0,Osaka,Abeno-ku,34.627501,135.514095
1,Osaka,Asahi-ku,34.726483,135.546952
2,Osaka,Chūō-ku,34.679846,135.510316
3,Osaka,Fukushima-ku,34.692104,135.474812
4,Osaka,Higashinari-ku,34.672912,135.550567
5,Osaka,Higashisumiyoshi-ku,34.615662,135.531096
6,Osaka,Higashiyodogawa-ku,34.740212,135.517432
7,Osaka,Hirano-ku,34.603715,135.559027
8,Osaka,Ikuno-ku,34.653003,135.547722
9,Osaka,Jōtō-ku,34.693887,135.547769


In [57]:
osaka_wards_count = df[df['City'] == 'Osaka']['Ward'].unique().size
tokyo_wards_count = df[df['City'] == 'Tokyo']['Ward'].unique().size

print('There are {} number of wards in Tokyo and {} number of wards in Osaka.'
      .format(tokyo_wards_count, osaka_wards_count))

There are 23 number of wards in Tokyo and 24 number of wards in Osaka.


In [23]:
import folium

map = folium.Map(location=[ 34.679846, 135.510316], zoom_start=11)

# add markers
for lat, lng, city, ward in zip(df['Latitude'], df['Longitude'], df['City'], df['Ward']):
    label = '{}, {}'.format(city, ward)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map) 
map