In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS
import re
import warnings
import folium
import bokeh
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from folium import plugins
from folium.plugins import HeatMap
import datetime
import geopy.distance
import mpu
from folium.plugins import MarkerCluster
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
warnings.filterwarnings("ignore")

In [2]:
# create a Request object for the wikipedia page for the Turing Award
response = requests.get('https://en.wikipedia.org/wiki/List_of_deadly_earthquakes_since_1900')

# read the resulting HTML into a variable called result_text
result_text = response.text

In [3]:
# create the soup by constructing a BS object from the html page and the appropriate parser
soup = BS(result_text, 'html.parser')

In [4]:
table = soup.find('table', {'class': "sortable wikitable"})

In [5]:

list_df = pd.read_html(str(table), header=0)

In [6]:
df_original = pd.DataFrame(list_df[0])


In [7]:
def remove_footnotes(unformatted, coltype, x=1):
   try:
       m = []
       if(coltype == 'reference'):
           m = re.search('(^\d+)\D*', unformatted)
       elif(coltype == 'country'):
           m = re.search('(^\w+)\W*', unformatted)
       elif(coltype == 'magnitude'):
           m = re.search('(^\d+\.*\d*)\W*', unformatted)
       return m.group(x)
   except:
       return 'NaN'

In [8]:
df_original['Present-day country and link to Wikipedia article'] = df_original['Present-day country and link to Wikipedia article'].apply(lambda x: remove_footnotes(x,coltype='country'))
df_original['Magnitude'] = df_original['Magnitude'].apply(lambda x: remove_footnotes(x,coltype='magnitude'))
df_original['EM-DAT Total Deaths'] = df_original['EM-DAT Total Deaths'].apply(lambda x: remove_footnotes(x,coltype='reference'))
df_original['Other Source Deaths'] = df_original['Other Source Deaths'].apply(lambda x: remove_footnotes(x,coltype='reference'))

In [9]:
df_original['Magnitude']=df_original['Magnitude'].astype('float64')
df_original['Other Source Deaths']=df_original['Other Source Deaths'].astype('float64')
df_original['EM-DAT Total Deaths']=df_original['EM-DAT Total Deaths'].astype('float64')
df_original = df_original.set_index(pd.DatetimeIndex(pd.to_datetime(df_original['Origin (UTC)'])))

In [10]:
df_original= df_original.rename(index=str, columns = {'Present-day country and link to Wikipedia article':'Country'})

In [11]:

df_original['Deaths'] = df_original[['PDE Total Deaths','Utsu Total Deaths','EM-DAT Total Deaths','Other Source Deaths']].max(axis=1)

In [12]:
df_original.tail()

Unnamed: 0_level_0,Origin (UTC),Country,Lat,Long,Depth (km),Magnitude,Secondary Effects,PDE Shaking Deaths,PDE Total Deaths,Utsu Total Deaths,EM-DAT Total Deaths,Other Source Deaths,Deaths
Origin (UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2011-03-11 14:46:00,2011-03-11 14:46,Japan,38.322,142.369,24.4,9.0,T,,,,,15894.0,15894.0
2011-03-24 20:25:00,2011-03-24 20:25,Burma,,,,6.8,,,,,,150.0,150.0
2011-04-07 14:32:00,2011-04-07 14:32,Japan,38.2,140.0,66.0,7.1,,,,,,,
2011-09-18 12:40:00,2011-09-18 12:40,India,27.723,88.064,19.7,6.9,L,,,,,111.0,111.0
2011-09-23 10:41:00,2011-09-23 10:41,Turkey,38.6,43.5,7.2,7.1,,,,,,601.0,601.0


In [13]:
df_original=df_original.rename(index=str, columns={"Origin (UTC)": "Time"})

In [14]:
df_original.info(
)

<class 'pandas.core.frame.DataFrame'>
Index: 1339 entries, 1900-05-11 17:23:00 to 2011-09-23 10:41:00
Data columns (total 13 columns):
Time                   1339 non-null object
Country                1339 non-null object
Lat                    1325 non-null object
Long                   1325 non-null object
Depth (km)             1249 non-null object
Magnitude              1338 non-null float64
Secondary Effects      373 non-null object
PDE Shaking Deaths     738 non-null float64
PDE Total Deaths       749 non-null float64
Utsu Total Deaths      1027 non-null float64
EM-DAT Total Deaths    559 non-null float64
Other Source Deaths    37 non-null float64
Deaths                 1273 non-null float64
dtypes: float64(7), object(6)
memory usage: 146.5+ KB


In [15]:
df_original["Country"].value_counts().sort_index()

Afghanistan      30
Albania           7
Algeria          21
Argentina        11
Armenia           2
Australia         3
Azerbaijan        5
Bangladesh        7
Belgium           1
Bolivia           3
Bosnia            3
Brazil            1
Bulgaria          5
Burma            11
Burundi           1
Canada            3
Chile            44
China           166
Colombia         24
Costa            10
Cuba              2
Cyprus            3
Czech             1
Democratic        6
Djibouti          1
Dominican         5
Ecuador          19
Egypt             4
El                7
England           1
Eritrea           1
Ethiopia          2
Fiji              1
France            4
Georgia           9
Ghana             1
Greece           44
Guadeloupe        1
Guatemala        15
Guinea            1
Haiti             3
Honduras          1
Hungary           1
Iceland           2
India            25
Indonesia        96
Iran             92
Iraq              1
Italy            37
Jamaica           2


In [16]:
df_original.to_csv("data_combined.csv")

In [17]:
combined_dataset = pd.read_excel('worldcities.xlsx')

In [18]:
combined_dataset.tail()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
12888,Sturgis,Sturgis,44.4093,-103.5184,United States,US,USA,South Dakota,,6787.0,1840002174
12889,Tea,Tea,43.4515,-96.8346,United States,US,USA,South Dakota,,5002.0,1840002841
12890,Brandon,Brandon,43.5928,-96.5799,United States,US,USA,South Dakota,,9232.0,1840002650
12891,Madison,Madison,44.0062,-97.1084,United States,US,USA,South Dakota,,7081.0,1840002540
12892,Belle Fourche,Belle Fourche,44.6635,-103.8562,United States,US,USA,South Dakota,,5297.0,1840002127


In [19]:
combined_dataset.head()

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Malishevë,Malisheve,42.4822,20.7458,Kosovo,XK,XKS,Malishevë,admin,,1901597212
1,Prizren,Prizren,42.2139,20.7397,Kosovo,XK,XKS,Prizren,admin,,1901360309
2,Zubin Potok,Zubin Potok,42.9144,20.6897,Kosovo,XK,XKS,Zubin Potok,admin,,1901608808
3,Kamenicë,Kamenice,42.5781,21.5803,Kosovo,XK,XKS,Kamenicë,admin,,1901851592
4,Viti,Viti,42.3214,21.3583,Kosovo,XK,XKS,Viti,admin,,1901328795


In [20]:
combined_dataset.shape

(12893, 11)

In [21]:

combined_dataset.drop(['admin_name', 'iso2', 'iso3',"id","city_ascii"], axis=1,inplace=True)

In [22]:
df_combined = pd.DataFrame(combined_dataset)

In [23]:
df_combined.shape

(12893, 6)

In [24]:
df_combined['Lat'] = pd.to_numeric(df_combined['lat'])

In [25]:
df_combined['Long'] = pd.to_numeric(df_combined['lng'])

In [26]:
lat=0
long=0

map=folium.Map(location=[lat,long],zoom_start=2)
map2=folium.Map(location=[lat,long],zoom_start=2)
map3=folium.Map(location=[lat,long],zoom_start=2)
map4=folium.Map(location=[lat,long],zoom_start=2)
map5=folium.Map(location=[lat,long],zoom_start=2)
map6=folium.Map(location=[lat,long],zoom_start=2)
map7=folium.Map(location=[lat,long],zoom_start=2)
map8=folium.Map(location=[lat,long],zoom_start=2)
map9=folium.Map(location=[lat,long],zoom_start=2)
map10=folium.Map(location=[lat,long],zoom_start=2)
map11=folium.Map(location=[lat,long],zoom_start=2)
map12=folium.Map(location=[lat,long],zoom_start=2)
map13=folium.Map(location=[lat,long],zoom_start=2)
map14=folium.Map(location=[lat,long],zoom_start=2)
map15=folium.Map(location=[lat,long],zoom_start=2)
map16=folium.Map(location=[lat,long],zoom_start=2)
map17=folium.Map(location=[lat,long],zoom_start=2)
map18=folium.Map(location=[lat,long],zoom_start=2)
map19=folium.Map(location=[lat,long],zoom_start=2)

In [27]:
df_combined = df_combined[df_combined['population'].notnull()]
df_combined = df_combined[df_combined['population']>1000000]

In [28]:
for row in df_combined.iterrows():
    row_values = row[1]
    location = [row_values['Lat'], row_values['Long']]
    marker = folium.CircleMarker(location=location,radius=1)
    marker.add_to(map)
map

In [29]:


eq=pd.read_csv('data_combined.csv')

In [30]:
eq["indexler"]=1

In [31]:
df_combined["indexler"]=1

In [32]:
data_ready=df_combined.merge(eq, left_on='indexler', right_on='indexler', how='outer')

In [33]:
data_ready.Lat_y=pd.to_numeric(data_ready.Lat_y,errors='coerce')
data_ready.Long_y=pd.to_numeric(data_ready.Long_y,errors='coerce')

In [34]:
data_ready=data_ready.dropna(subset=['lat', 'lng','Lat_y', 'Long_y'])

In [35]:
data_ready["dist"]=10000

In [36]:
data_ready["dist"]=data_ready.apply(lambda row : mpu.haversine_distance((row["lat"], row["lng"]), (row["Lat_y"], row["Long_y"])),axis=1)

In [37]:
data_ready.to_pickle("./data_ready_with_dist.pkl")

In [38]:
data_ready = data_ready.loc[(data_ready.dist < 100)]

In [39]:
map18 =folium.Map(location=[lat,long],zoom_start=2)

for row in data_ready.iterrows():
    row_values = row[1]
    location = [row_values['Lat_x'], row_values['Long_x']]
    #marker = folium.CircleMarker(location=location,radius=1)
    marker.add_to(map18)
    
    def color(indexler):
    
        if indexler<=2:
            col='green'
        elif ((indexler>2)&(indexler<=5)):
            col='blue'
        elif (indexler>5):
            col='red'
        return col   
map18.add_child(MarkerCluster(locations=list(zip(data_ready['Lat_x'], 
                                 data_ready['Long_x'])),
                                 popups=" number of earthquakes: " + data_ready['indexler'].astype(str) +' <br\>'+ "deaths: " + data_ready['Deaths'].astype(str),
                                 icons=[color(m) for m in data_ready['indexler']]))



map18

In [40]:
def color(indexler):
    if indexler<=2:
        col='green'
    elif ((indexler>2)&(indexler<=5)):
        col='blue'
    elif (indexler>5):
        col='red'
    return col

In [41]:
map21 =folium.Map(location=[lat,long],zoom_start=2)
mc = MarkerCluster()
for row in data_ready.iterrows():
    row_values = row[1]
    location = [row_values['Lat_x'], row_values['Long_x']]
    popup= " number of earthquakes: " + str(row_values['indexler'])  +' <br\>'+ "deaths: " + str(row_values['Deaths'])
    mc.add_child(folium.CircleMarker(location=location
                                     ,color= color(row_values['indexler'])
                                     ,radius=row_values['Deaths']*0.001
                                     ,fill_color = color(row_values['indexler'])
                                     ,fill_opacity = 0.5
                                     ,popup=popup
                                    ))
map21.add_child(mc)
map21


In [42]:
map23 =folium.Map(location=[lat,long],zoom_start=2)

for row in data_ready.iterrows():
    row_values = row[1]
    location = [row_values['Lat_x'], row_values['Long_x']]
    popup= " number of earthquakes: " + str(row_values['indexler'])  +' <br\>'+ "deaths: " + str(row_values['Deaths'])
    marker = folium.CircleMarker(location=location
                                     ,color= color(row_values['indexler'])
                                     ,radius=row_values['Deaths']*0.0001
                                     ,fill_color = color(row_values['indexler'])
                                     ,fill_opacity = 0.5
                                     ,popup=popup
                                    )
    marker.add_to(map23)

map23
