In [1]:
import os
import requests
from datetime import datetime

import geopandas as gpd
import geojsonio

import pandas as pd
from pandas.io.json import json_normalize

import folium
from folium import plugins

## USGS GeoJSON format

schema = {
  type: "FeatureCollection",
  metadata: {
    generated: Long Integer,
    url: String,
    title: String,
    api: String,
    count: Integer,
    status: Integer
  },
  bbox: [
    minimum longitude,
    minimum latitude,
    minimum depth,
    maximum longitude,
    maximum latitude,
    maximum depth
  ],
  features: [
    {
      type: "Feature",
      properties: {
        mag: Decimal,
        place: String,
        time: Long Integer,
        updated: Long Integer,
        tz: Integer,
        url: String,
        detail: String,
        felt:Integer,
        cdi: Decimal,
        mmi: Decimal,
        alert: String,
        status: String,
        tsunami: Integer,
        sig:Integer,
        net: String,
        code: String,
        ids: String,
        sources: String,
        types: String,
        nst: Integer,
        dmin: Decimal,
        rms: Decimal,
        gap: Decimal,
        magType: String,
        type: String
      },
      geometry: {
        type: "Point",
        coordinates: [
          longitude,
          latitude,
          depth
        ]
      },
      id: String
    },
    …
  ]
}

## Getting data

In [2]:
data = requests.get("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson")
# data = requests.get("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_week.geojson")


jsondata = data.json()
quakes = pd.json_normalize(jsondata['features'])

In [3]:
quakes.head(2)

Unnamed: 0,type,id,properties.mag,properties.place,properties.time,properties.updated,properties.tz,properties.url,properties.detail,properties.felt,...,properties.types,properties.nst,properties.dmin,properties.rms,properties.gap,properties.magType,properties.type,properties.title,geometry.type,geometry.coordinates
0,Feature,us7000dh4k,4.9,Kermadec Islands region,1615223077750,1615224793040,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,,...,",origin,phase-data,",,0.57,1.2,105.0,mb,earthquake,M 4.9 - Kermadec Islands region,Point,"[-178.2937, -28.7708, 10]"
1,Feature,pr2021067007,2.67,"9 km SSE of Maria Antonia, Puerto Rico",1615222023250,1615223444701,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,,...,",origin,phase-data,",17.0,0.2108,0.15,208.0,md,earthquake,"M 2.7 - 9 km SSE of Maria Antonia, Puerto Rico",Point,"[-66.8438, 17.9041, 8]"


## Cleaning the data

Some of these fields are essientially useless in all cases. A good example is geometry.type which according to the schema definition is going to always be point.

A couple are just redundant. For example 'title' is just 'mag' plus 'place'

Some are probably useless but *might* be useful, particularly if we apply a ML model to it later. So, I'll keep them for now. But the prepending everything with 'properties' or 'geometry' -- that has to go.

In [4]:
quakes.columns = quakes.columns.str.replace('properties.', "", regex=False)
quakes.columns = quakes.columns.str.replace('geometry.', "", regex=False)
quakes.drop(['id', 'type', 'updated', 'tz', 'mmi', 'detail', 'felt','cdi', 'felt', 'types', 'nst', 'type', 'title'], 
        axis=1, inplace=True)

In [5]:
quakes.head(2)

Unnamed: 0,mag,place,time,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,coordinates
0,4.9,Kermadec Islands region,1615223077750,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dh4k,",us7000dh4k,",",us,",0.57,1.2,105.0,mb,"[-178.2937, -28.7708, 10]"
1,2.67,"9 km SSE of Maria Antonia, Puerto Rico",1615222023250,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,110,pr,2021067007,",pr2021067007,",",pr,",0.2108,0.15,208.0,md,"[-66.8438, 17.9041, 8]"


In [6]:
quakes['ids'] = quakes['ids'].str.strip(',')
quakes['sources'] = quakes['sources'].str.strip(',')

In [7]:
quakes['time'] = pd.to_datetime(quakes['time'], unit='ms')


In [8]:
quakes

Unnamed: 0,mag,place,time,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,coordinates
0,4.9,Kermadec Islands region,2021-03-08 17:04:37.750,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dh4k,us7000dh4k,us,0.57,1.2,105.0,mb,"[-178.2937, -28.7708, 10]"
1,2.67,"9 km SSE of Maria Antonia, Puerto Rico",2021-03-08 16:47:03.250,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,110,pr,2021067007,pr2021067007,pr,0.2108,0.15,208.0,md,"[-66.8438, 17.9041, 8]"
2,2.5,"21 km WNW of Mentone, Texas",2021-03-08 15:48:47.957,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,96,tx,2021erun,tx2021erun,tx,0.182366,0.2,44.0,ml,"[-103.8196853, 31.76384748, 6.054443359000001]"
3,4.6,"65 km ENE of Namie, Japan",2021-03-08 14:36:06.507,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,326,us,7000dh3t,us7000dh3t,us,3.011,0.41,135.0,mb,"[141.6467, 37.7803, 59.19]"
4,4.4,"162 km ESE of Petropavlovsk-Kamchatsky, Russia",2021-03-08 14:11:40.091,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,298,us,7000dh3m,us7000dh3m,us,1.45,0.5,138.0,mb,"[160.7717, 52.3639, 10]"
5,3.3,"26 km S of Petersville, Alaska",2021-03-08 13:00:29.561,https://earthquake.usgs.gov/earthquakes/eventp...,,automatic,0,168,ak,021330qy6y,"ak021330qy6y,us7000dh3f","ak,us",,0.7,,ml,"[-150.8629, 62.2593, 63.8]"
6,5.5,"158 km WSW of Bengkulu, Indonesia",2021-03-08 12:56:34.217,https://earthquake.usgs.gov/earthquakes/eventp...,green,reviewed,0,465,us,7000dh3d,us7000dh3d,us,1.966,0.71,99.0,mb,"[100.9848, -4.4317, 10]"
7,2.9,"72 km SW of Kaktovik, Alaska",2021-03-08 12:52:28.874,https://earthquake.usgs.gov/earthquakes/eventp...,,automatic,0,129,ak,021330gnse,ak021330gnse,ak,,0.77,,ml,"[-144.7698, 69.6123, 0]"
8,5.2,"95 km NE of Los Andes, Chile",2021-03-08 12:48:07.038,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,424,us,7000dh39,us7000dh39,us,0.784,0.99,40.0,mww,"[-69.8945, -32.2076, 102.46]"
9,2.7,"20 km E of Chase, Alaska",2021-03-08 12:42:09.105,https://earthquake.usgs.gov/earthquakes/eventp...,,automatic,0,112,ak,021330eh3g,ak021330eh3g,ak,,0.72,,ml,"[-149.7035, 62.474, 37.2]"


## Parsing the data

Breakout the coords column

In [9]:
quakes['longitude'] = quakes.coordinates.str[0]
quakes['latitude'] = quakes.coordinates.str[1]
quakes['depth'] = quakes.coordinates.str[2]

quakes.drop(['coordinates'], axis=1, inplace=True)

In [10]:
quakes['datetime'] = pd.to_datetime(quakes['time']).dt.strftime("%Y-%m-%d %H:%M")
quakes.drop(['time'], axis=1, inplace=True)

In [11]:
quakes.round({'longitude': 5, 'latitude': 5, 'depth': 0})
quakes.head(2)

Unnamed: 0,mag,place,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,longitude,latitude,depth,datetime
0,4.9,Kermadec Islands region,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dh4k,us7000dh4k,us,0.57,1.2,105.0,mb,-178.2937,-28.7708,10.0,2021-03-08 17:04
1,2.67,"9 km SSE of Maria Antonia, Puerto Rico",https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,110,pr,2021067007,pr2021067007,pr,0.2108,0.15,208.0,md,-66.8438,17.9041,8.0,2021-03-08 16:47


In [12]:
m = folium.Map(
    location=[0, 0],
    tiles='cartodbpositron',
    zoom_start=1.8
)

Figure out how to get time to display

In [13]:
fmtr = "function(num) {return L.Util.formatNum(num, 3);};"
folium.plugins.MousePosition(separator=' / ', prefix="Lat/Long: ", lat_formatter=fmtr, lng_formatter=fmtr).add_to(m)

<folium.plugins.mouse_position.MousePosition at 0x7f9f486ccd00>

In [14]:
for i, row in quakes.iterrows():
    folium.CircleMarker((row.latitude, row.longitude), radius=row.mag*2.2, 
                        color='red', weight = 0, opacity =.4, fill=True, fill_color='orange', fill_opacity=.3, 
                        popup=[f"Time: {row.datetime},\n Mag: {row.mag},\n Depth: {row.depth} km"]).add_to(m)
    

In [15]:
boundaries = ('data/GeoJSON/PB2002_boundaries.json')
line_style = {'color': '#FF3333', 'weight': 1, 'opacity':row.mag*1.4}


folium.GeoJson(
    boundaries,
    name='major fault lines',
    style_function=lambda x:line_style,
    smooth_factor=4.0,
      
).add_to(m)
m

## Tsunami data

In [16]:
cd data

/Users/alex/Code/Pybraries/folium/Quakes/data


In [55]:
tsunami = pd.read_csv('tsunamis-2021-03-06_22-27-38_-0500.tsv', sep='\t')

### We have to have reliable times in order to develop warnings

In [56]:
tsunami = tsunami.dropna(subset=['Earthquake Magnitude', 'Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec'])

In [57]:
tsunami.head()

Unnamed: 0,Search Parameters,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,...,Total Missing,Total Missing Description,Total Injuries,Total Injuries Description,Total Damage ($Mil),Total Damage Description,Total Houses Destroyed,Total Houses Destroyed Description,Total Houses Damaged,Total Houses Damaged Description
372,,1703.0,2.0,2.0,21.0,11.0,5.0,0.0,1.0,6.7,...,,,,,,3.0,,3.0,,
700,,1828.0,3.0,30.0,12.0,35.0,0.0,-1.0,1.0,8.3,...,,,,,,,,,,
1234,,1886.0,9.0,1.0,2.0,51.0,0.0,4.0,1.0,7.3,...,,,,,,,,,,
1286,,1893.0,6.0,4.0,2.0,27.0,0.0,4.0,1.0,7.0,...,,,,,,,,,,
1329,,1897.0,8.0,5.0,0.0,10.0,0.0,4.0,1.0,7.7,...,,,,,,1.0,,,,


In [58]:
tsunami['Tsunami Cause Code'].unique()

array([1., 3., 4., 9., 2.])

In [59]:
for col in tsunami.columns: 
    if 'Injuries' in col or 'Death' in col or 'Missing' in col or 'Damage' in col: 
        del tsunami[col] 

In [60]:
for col in tsunami.columns: 
    if 'Search Parameters' in col or 'Total' in col or 'Houses' in col: 
        del tsunami[col] 

In [61]:
for col in tsunami.columns: 
    if 'More Info' in col or 'Vol' in col: 
        del tsunami[col] 

In [87]:
tsunami.head()

Unnamed: 0,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Deposits,Country,Location Name,Latitude,Longitude,Maximum Water Height (m),Number of Runups,Tsunami Magnitude (Abe),Tsunami Magnitude (Iida),Tsunami Intensity
1234,1886.0,9.0,1.0,2.0,51.0,0.0,4.0,1.0,7.3,0.0,USA,"CHARLESTON, SC",32.9,-80.0,,3.0,,,
1286,1893.0,6.0,4.0,2.0,27.0,0.0,4.0,1.0,7.0,0.0,RUSSIA,S. KURIL ISLANDS,43.3,147.5,3.0,3.0,,1.6,1.5
1329,1897.0,8.0,5.0,0.0,10.0,0.0,4.0,1.0,7.7,0.0,JAPAN,SANRIKU,38.0,143.7,3.6,37.0,,1.5,1.0
1366,1901.0,8.0,9.0,9.0,23.0,30.0,4.0,1.0,7.2,0.0,JAPAN,OFF NORTHEAST COAST HONSHU,40.5,142.5,0.6,3.0,,0.0,-0.5
1368,1901.0,8.0,9.0,18.0,33.0,45.0,4.0,1.0,7.4,0.0,JAPAN,OFF NORTHEAST COAST HONSHU,40.6,142.3,0.25,2.0,,-1.0,-1.0


In [88]:
# This is worth writing a blog on
# doubtful = (tsunami.loc[tsunami['Tsunami Event Validity'] < 3]) didn't work but ..
# doubtful = tsunami[tsunami['Tsunami Event Validity'] < 3 ].index
# tsunami.drop(doubtful, inplace=True) ---> DID work

In [89]:
doubtful = tsunami[tsunami['Tsunami Event Validity'] < 3 ].index

In [90]:
doubtful

Int64Index([], dtype='int64')

In [91]:
tsunami.drop(doubtful, inplace=True)


In [79]:
tsunami['Tsunami Event Validity'].unique()

array([4., 3.])

In [92]:
tsunami.reset_index()

Unnamed: 0,index,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Deposits,Country,Location Name,Latitude,Longitude,Maximum Water Height (m),Number of Runups,Tsunami Magnitude (Abe),Tsunami Magnitude (Iida),Tsunami Intensity
0,1234,1886.0,9.0,1.0,2.0,51.0,0.0,4.0,1.0,7.3,0.0,USA,"CHARLESTON, SC",32.900,-80.000,,3.0,,,
1,1286,1893.0,6.0,4.0,2.0,27.0,0.0,4.0,1.0,7.0,0.0,RUSSIA,S. KURIL ISLANDS,43.300,147.500,3.00,3.0,,1.6,1.5
2,1329,1897.0,8.0,5.0,0.0,10.0,0.0,4.0,1.0,7.7,0.0,JAPAN,SANRIKU,38.000,143.700,3.60,37.0,,1.5,1.0
3,1366,1901.0,8.0,9.0,9.0,23.0,30.0,4.0,1.0,7.2,0.0,JAPAN,OFF NORTHEAST COAST HONSHU,40.500,142.500,0.60,3.0,,0.0,-0.5
4,1368,1901.0,8.0,9.0,18.0,33.0,45.0,4.0,1.0,7.4,0.0,JAPAN,OFF NORTHEAST COAST HONSHU,40.600,142.300,0.25,2.0,,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,2705,2021.0,1.0,23.0,23.0,36.0,50.0,4.0,1.0,6.9,0.0,ANTARCTICA,SCOTIA SEA,-61.825,-55.494,0.04,2.0,,,
677,2706,2021.0,2.0,10.0,13.0,19.0,55.0,4.0,1.0,7.7,0.0,NEW CALEDONIA,LOYALTY ISLANDS,-23.054,171.601,0.78,20.0,,,
678,2707,2021.0,3.0,4.0,13.0,27.0,36.0,4.0,1.0,7.3,0.0,NEW ZEALAND,GISBORNE,-37.563,179.444,0.28,4.0,,,
679,2708,2021.0,3.0,4.0,17.0,41.0,25.0,4.0,1.0,7.4,0.0,NEW ZEALAND,"S OF RAOUL ISLAND, KERMADEC ISLANDS",-29.613,-177.843,0.31,2.0,,,


In [93]:
tsunami[['Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec']].astype('int')

Unnamed: 0,Year,Mo,Dy,Hr,Mn,Sec
1234,1886,9,1,2,51,0
1286,1893,6,4,2,27,0
1329,1897,8,5,0,10,0
1366,1901,8,9,9,23,30
1368,1901,8,9,18,33,45
...,...,...,...,...,...,...
2705,2021,1,23,23,36,50
2706,2021,2,10,13,19,55
2707,2021,3,4,13,27,36
2708,2021,3,4,17,41,25
