# Step 1. Get data from website to dataframe

#### Get data from *https://earthquake.tenki.jp/bousai/earthquake/entries/* by Time, Location, Magnitude, Maximum

In [2]:
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
import pandas as pd
import requests
import numpy as np
!conda install -c conda-forge folium=0.5.0
import folium

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  54.65 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  36.20 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  39.11 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  45.40 MB/s


In [58]:
DATALINK = 'https://earthquake.tenki.jp/bousai/earthquake/entries/'

df = pd.DataFrame(columns=['Time', 'Location', 'Magnitude', 'Maximum'])

geolocator = Nominatim()

COUNTRY = ', 日本'

for i in range(1,269):
    if i > 1:
        datalink = DATALINK + 'page-' + str(i) +'.html'
    else:
        datalink = DATALINK
    source = requests.get(datalink).text
    soup = BeautifulSoup(source, 'html.parser')
    table = soup.find_all('table', {'class':'earthquake-entries-table'})[0]
    trs = table.find_all('tr')[1:]
    for j in range(0,len(trs)):
        tds = trs[j].text.split('\n')
        time = tds[2]
        location = tds[3]
        magnitude = tds[4][1:]
        try:
            tmp1 = float(magnitude)
            magnitude = tmp1
        except ValueError:
            magnitude = np.nan
        if len(trs[j].find_all('td')[4].find_all('img', alt=True)) > 0:
            maximum = trs[j].find_all('td')[4].find_all('img', alt=True)[0]['alt']
            try:
                tmp2 = int(maximum)
                maximum = tmp2
            except ValueError:
                try:
                    tmp3 = int(maximum[0])
                    maximum = tmp3
                except ValueError:
                    maximum = np.nan
        else:
            maximum = np.nan
        df = df.append({'Time': time,
                        'Location': location,
                        'Magnitude': magnitude,
                        'Maximum':maximum}, ignore_index=True)
df.head(5)

Unnamed: 0,Time,Location,Magnitude,Maximum
0,2019年03月28日11時17分頃,十勝沖,4.6,2
1,2019年03月28日08時12分頃,奄美大島近海,3.6,1
2,2019年03月27日18時11分頃,日向灘,3.7,1
3,2019年03月27日15時54分頃,日向灘,2.7,1
4,2019年03月27日15時38分頃,日向灘,5.4,4


## Step 2. Format Data
*Remove rows that is na

In [94]:
df.dropna(axis=0, inplace=True)
df = df.reset_index(drop=True)
df.shape

(26540, 4)

In [95]:
def getLocation(location):
    tmp = location.find('都')
    if tmp > 0:
        return location[:tmp] + '都'
    
    tmp = location.find('道')
    if tmp > 0:
        return location[:tmp] + '道'
    
    tmp = location.find('府')
    if tmp > 0:
        return location[:tmp] + '府'
    
    tmp = location.find('県')
    if tmp > 0:
        return location[:tmp] + '県'
    
    tmp = location.find('島')
    if tmp > 0:
        return location[:tmp] + '島'
    
    return location

## Step 3. Get all data Maximum that has greater than 4. 
*Because Magnitude range from 4.0 and above is dangerous*

In [119]:
df_4up = df.loc[df['Maximum'] >= 4]
df_4up = df_4up.reset_index(drop=True)

for index in range(0, df_4up.shape[0]):
    city = getLocation(df_4up.loc[index, 'Location'])
    df_4up.loc[index, 'Location'] = city
    df_4up.loc[index, 'Time'] = 1

df_4up.head()

Unnamed: 0,Time,Location,Magnitude,Maximum
0,1,日向灘,5.4,4
1,1,紀伊水道,5.2,4
2,1,岐阜県,4.5,4
3,1,宮城県,4.6,4
4,1,根室半島,6.2,4


## Step 4. Find the top 5 areas with frequent earthquakes

In [120]:
top5 = df_4up
top5.drop(['Magnitude', 'Maximum'], axis=1, inplace=True)
top5 = top5.groupby(['Location']).count().sort_values(['Time'], ascending=False).head(5)

top5.head()

Unnamed: 0_level_0,Time
Location,Unnamed: 1_level_1
福島県,158
茨城県,124
熊本県,121
宮城県,58
岩手県,57


## Step 5. Visualize top 5 regions on Japan's map

In [127]:
japan = geolocator.geocode('Japan')
japan_map = folium.Map(location=[japan.latitude, japan.longitude], zoom_start=5)

for tmp in top5.index.values:
    location = geolocator.geocode(tmp + ', 日本')
    folium.CircleMarker(
        [location.latitude, location.longitude],
        radius=0.5,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(japan_map)

#osaka, Japan
folium.CircleMarker(
        [34.652500, 135.506302],
        radius=2,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(japan_map)

#tokyo, Japan
folium.CircleMarker(
        [35.652832, 139.839478	],
        radius=2,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(japan_map)

japan_map

#### Currently, I am living in Osaka(大阪） and if I have permanent residence, I will choose Osaka（大阪） and Tokyo（東京）. Based on the analysis results, these two cities do not have many earthquakes, so I can be assured of this problem