In [1]:
import os
import requests
from datetime import datetime

import geopandas as gpd
import geojsonio

import pandas as pd
from pandas.io.json import json_normalize

import folium
from folium import plugins

## USGS GeoJSON format

schema = {
  type: "FeatureCollection",
  metadata: {
    generated: Long Integer,
    url: String,
    title: String,
    api: String,
    count: Integer,
    status: Integer
  },
  bbox: [
    minimum longitude,
    minimum latitude,
    minimum depth,
    maximum longitude,
    maximum latitude,
    maximum depth
  ],
  features: [
    {
      type: "Feature",
      properties: {
        mag: Decimal,
        place: String,
        time: Long Integer,
        updated: Long Integer,
        tz: Integer,
        url: String,
        detail: String,
        felt:Integer,
        cdi: Decimal,
        mmi: Decimal,
        alert: String,
        status: String,
        tsunami: Integer,
        sig:Integer,
        net: String,
        code: String,
        ids: String,
        sources: String,
        types: String,
        nst: Integer,
        dmin: Decimal,
        rms: Decimal,
        gap: Decimal,
        magType: String,
        type: String
      },
      geometry: {
        type: "Point",
        coordinates: [
          longitude,
          latitude,
          depth
        ]
      },
      id: String
    },
    …
  ]
}

## Getting data

In [2]:
data = requests.get("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson")
# data = requests.get("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_week.geojson")


jsondata = data.json()
quakes = pd.json_normalize(jsondata['features'])

In [3]:
quakes.head(2)

Unnamed: 0,type,id,properties.mag,properties.place,properties.time,properties.updated,properties.tz,properties.url,properties.detail,properties.felt,...,properties.types,properties.nst,properties.dmin,properties.rms,properties.gap,properties.magType,properties.type,properties.title,geometry.type,geometry.coordinates
0,Feature,uu60428907,2.61,"15 km ESE of Paragonah, Utah",1615307287750,1615310448040,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,,...,",origin,phase-data,",17.0,0.4354,0.14,84.0,ml,earthquake,"M 2.6 - 15 km ESE of Paragonah, Utah",Point,"[-112.6208333, 37.8251667, 4.3]"
1,Feature,us7000dhgb,4.9,Kermadec Islands region,1615306709442,1615308427040,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,,...,",origin,phase-data,",,1.535,0.34,238.0,mb,earthquake,M 4.9 - Kermadec Islands region,Point,"[-176.5724, -28.2728, 10]"


## Cleaning the data

Some of these fields are essientially useless in all cases. A good example is geometry.type which according to the schema definition is going to always be point.

A couple are just redundant. For example 'title' is just 'mag' plus 'place'

Some are probably useless but *might* be useful, particularly if we apply a ML model to it later. So, I'll keep them for now. But the prepending everything with 'properties' or 'geometry' -- that has to go.

In [4]:
quakes.columns = quakes.columns.str.replace('properties.', "", regex=False)
quakes.columns = quakes.columns.str.replace('geometry.', "", regex=False)
quakes.drop(['id', 'type', 'updated', 'tz', 'mmi', 'detail', 'felt','cdi', 'felt', 'types', 'nst', 'type', 'title'], 
        axis=1, inplace=True)

In [5]:
quakes.head(2)

Unnamed: 0,mag,place,time,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,coordinates
0,2.61,"15 km ESE of Paragonah, Utah",1615307287750,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,105,uu,60428907,",us7000dhg9,uu60428907,",",us,uu,",0.4354,0.14,84.0,ml,"[-112.6208333, 37.8251667, 4.3]"
1,4.9,Kermadec Islands region,1615306709442,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dhgb,",us7000dhgb,",",us,",1.535,0.34,238.0,mb,"[-176.5724, -28.2728, 10]"


In [6]:
quakes['ids'] = quakes['ids'].str.strip(',')
quakes['sources'] = quakes['sources'].str.strip(',')

In [7]:
quakes['time'] = pd.to_datetime(quakes['time'], unit='ms')


In [8]:
quakes

Unnamed: 0,mag,place,time,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,coordinates
0,2.61,"15 km ESE of Paragonah, Utah",2021-03-09 16:28:07.750,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,105,uu,60428907,"us7000dhg9,uu60428907","us,uu",0.4354,0.14,84.0,ml,"[-112.6208333, 37.8251667, 4.3]"
1,4.9,Kermadec Islands region,2021-03-09 16:18:29.442,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dhgb,us7000dhgb,us,1.535,0.34,238.0,mb,"[-176.5724, -28.2728, 10]"
2,4.6,Kermadec Islands region,2021-03-09 15:20:02.551,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,326,us,7000dhfu,us7000dhfu,us,2.029,0.54,289.0,mb,"[-175.6262, -28.9983, 10]"
3,4.8,Kermadec Islands region,2021-03-09 14:07:14.185,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,354,us,7000dhf6,us7000dhf6,us,1.078,0.99,182.0,mb,"[-176.6966, -29.31, 10]"
4,3.29,"31 km WSW of Forks, Washington",2021-03-09 13:10:58.570,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,169,uw,61707577,"uw61707577,us7000dhez","uw,us",0.1913,0.5,200.0,ml,"[-124.742, 47.7973333333333, 8.06]"
5,3.7,"115 km SSE of Sand Point, Alaska",2021-03-09 11:46:34.683,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,211,us,7000dheg,us7000dheg,us,0.411,0.53,202.0,mb,"[-159.6357, 54.4214, 30.88]"
6,4.6,"49 km ESE of Phek, India",2021-03-09 11:15:00.996,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,326,us,7000dhef,us7000dhef,us,1.354,0.99,106.0,mb,"[94.938, 25.4592, 77.63]"
7,3.27,"49 km N of Brenas, Puerto Rico",2021-03-09 10:09:45.890,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,165,pr,2021068006,"us7000dhe9,pr2021068006","us,pr",0.4445,0.14,261.0,md,"[-66.4285, 18.91, 31]"
8,4.9,"Kermadec Islands, New Zealand",2021-03-09 09:46:02.430,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dhe3,us7000dhe3,us,0.687,1.06,128.0,mb,"[-177.1865, -29.0203, 10]"
9,2.88,"12 km E of Pāhala, Hawaii",2021-03-09 09:05:53.090,https://earthquake.usgs.gov/earthquakes/eventp...,,automatic,0,128,hv,72377857,hv72377857,hv,,0.15,159.0,ml,"[-155.363830566406, 19.2164993286133, 32.20000..."


## Parsing the data

Breakout the coords column

In [9]:
quakes['longitude'] = quakes.coordinates.str[0]
quakes['latitude'] = quakes.coordinates.str[1]
quakes['depth'] = quakes.coordinates.str[2]

quakes.drop(['coordinates'], axis=1, inplace=True)

In [10]:
quakes['datetime'] = pd.to_datetime(quakes['time']).dt.strftime("%Y-%m-%d %H:%M")
quakes.drop(['time'], axis=1, inplace=True)

In [11]:
quakes.round({'longitude': 5, 'latitude': 5, 'depth': 0})
quakes.head(2)

Unnamed: 0,mag,place,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,longitude,latitude,depth,datetime
0,2.61,"15 km ESE of Paragonah, Utah",https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,105,uu,60428907,"us7000dhg9,uu60428907","us,uu",0.4354,0.14,84.0,ml,-112.620833,37.825167,4.3,2021-03-09 16:28
1,4.9,Kermadec Islands region,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,7000dhgb,us7000dhgb,us,1.535,0.34,238.0,mb,-176.5724,-28.2728,10.0,2021-03-09 16:18


In [12]:
m = folium.Map(
    location=[0, 0],
    tiles='cartodbpositron',
    zoom_start=1.8
)

Figure out how to get time to display

In [13]:
fmtr = "function(num) {return L.Util.formatNum(num, 3);};"
folium.plugins.MousePosition(separator=' / ', prefix="Lat/Long: ", lat_formatter=fmtr, lng_formatter=fmtr).add_to(m)

<folium.plugins.mouse_position.MousePosition at 0x7fac5a6b97f0>

In [14]:
for i, row in quakes.iterrows():
    folium.CircleMarker((row.latitude, row.longitude), radius=row.mag*2.2, 
                        color='red', weight = 0, opacity =.4, fill=True, fill_color='orange', fill_opacity=.3, 
                        popup=[f"Time: {row.datetime},\n Mag: {row.mag},\n Depth: {row.depth} km"]).add_to(m)
    

In [15]:
boundaries = ('data/GeoJSON/PB2002_boundaries.json')
line_style = {'color': '#FF3333', 'weight': 1, 'opacity':row.mag*1.4}


folium.GeoJson(
    boundaries,
    name='major fault lines',
    style_function=lambda x:line_style,
    smooth_factor=4.0,
      
).add_to(m)
m

## Tsunami data

In [16]:
cd data

/Users/alex/Code/Pybraries/folium/Quakes/data


In [17]:
tsunami = pd.read_csv('tsunamis-2021-03-06_22-27-38_-0500.tsv', sep='\t')

### We have to have reliable time and magnitudes in order to develop warnings

In [18]:
tsunami = tsunami.dropna(subset=['Earthquake Magnitude', 'Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec'])

In [19]:
# tsunami.info()

### Again, there is a lot of data that was collected that isn't all that useful for what's being done here

In [20]:
cols = [0, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]
tsunami.drop(tsunami.columns[cols], axis=1, inplace=True)

In [21]:
tsunami

Unnamed: 0,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Location Name,Latitude,Longitude,Maximum Water Height (m)
372,1703.0,2.0,2.0,21.0,11.0,5.0,0.0,1.0,6.7,LATIUM,42.467,13.200,
700,1828.0,3.0,30.0,12.0,35.0,0.0,-1.0,1.0,8.3,CENTRAL PERU,,,
1234,1886.0,9.0,1.0,2.0,51.0,0.0,4.0,1.0,7.3,"CHARLESTON, SC",32.900,-80.000,
1286,1893.0,6.0,4.0,2.0,27.0,0.0,4.0,1.0,7.0,S. KURIL ISLANDS,43.300,147.500,3.00
1329,1897.0,8.0,5.0,0.0,10.0,0.0,4.0,1.0,7.7,SANRIKU,38.000,143.700,3.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2705,2021.0,1.0,23.0,23.0,36.0,50.0,4.0,1.0,6.9,SCOTIA SEA,-61.825,-55.494,0.04
2706,2021.0,2.0,10.0,13.0,19.0,55.0,4.0,1.0,7.7,LOYALTY ISLANDS,-23.054,171.601,0.78
2707,2021.0,3.0,4.0,13.0,27.0,36.0,4.0,1.0,7.3,GISBORNE,-37.563,179.444,0.28
2708,2021.0,3.0,4.0,17.0,41.0,25.0,4.0,1.0,7.4,"S OF RAOUL ISLAND, KERMADEC ISLANDS",-29.613,-177.843,0.31


In [22]:
tsunami.head()

Unnamed: 0,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Location Name,Latitude,Longitude,Maximum Water Height (m)
372,1703.0,2.0,2.0,21.0,11.0,5.0,0.0,1.0,6.7,LATIUM,42.467,13.2,
700,1828.0,3.0,30.0,12.0,35.0,0.0,-1.0,1.0,8.3,CENTRAL PERU,,,
1234,1886.0,9.0,1.0,2.0,51.0,0.0,4.0,1.0,7.3,"CHARLESTON, SC",32.9,-80.0,
1286,1893.0,6.0,4.0,2.0,27.0,0.0,4.0,1.0,7.0,S. KURIL ISLANDS,43.3,147.5,3.0
1329,1897.0,8.0,5.0,0.0,10.0,0.0,4.0,1.0,7.7,SANRIKU,38.0,143.7,3.6


In [23]:
# This is worth writing a blog on
# doubtful = (tsunami.loc[tsunami['Tsunami Event Validity'] < 3]) didn't work but ..
# doubtful = tsunami[tsunami['Tsunami Event Validity'] < 3 ].index
# tsunami.drop(doubtful, inplace=True) ---> DID work

In [24]:
doubtful = tsunami[tsunami['Tsunami Event Validity'] < 3 ].index
# doubtful #-- 208 occurances

In [25]:
tsunami.drop(doubtful, inplace=True)

In [26]:
# Quick confirmation to make sure it did what was expected
tsunami['Tsunami Event Validity'].unique()

array([4., 3.])

In [27]:
tsunami = tsunami.astype({"Year": int, "Mo": int, 'Dy': int, 'Hr': int, 'Mn': int, 'Sec': int})

In [28]:
tsunami['DTG'] = tsunami.apply(lambda r: datetime.strptime(f"{r['Year']} {r['Mo']} {r['Dy']} {r['Hr']}:{r['Mn']}:{r['Sec']}", '%Y %m %d %H:%M:%S'), axis=1)
tsunami.drop(['Year','Mo', 'Dy', 'Hr', 'Mn', 'Sec'], axis=1, inplace=True)

In [29]:
tsunami.set_index('DTG')

Unnamed: 0_level_0,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Location Name,Latitude,Longitude,Maximum Water Height (m)
DTG,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1886-09-01 02:51:00,4.0,1.0,7.3,"CHARLESTON, SC",32.900,-80.000,
1893-06-04 02:27:00,4.0,1.0,7.0,S. KURIL ISLANDS,43.300,147.500,3.00
1897-08-05 00:10:00,4.0,1.0,7.7,SANRIKU,38.000,143.700,3.60
1901-08-09 09:23:30,4.0,1.0,7.2,OFF NORTHEAST COAST HONSHU,40.500,142.500,0.60
1901-08-09 18:33:45,4.0,1.0,7.4,OFF NORTHEAST COAST HONSHU,40.600,142.300,0.25
...,...,...,...,...,...,...,...
2021-01-23 23:36:50,4.0,1.0,6.9,SCOTIA SEA,-61.825,-55.494,0.04
2021-02-10 13:19:55,4.0,1.0,7.7,LOYALTY ISLANDS,-23.054,171.601,0.78
2021-03-04 13:27:36,4.0,1.0,7.3,GISBORNE,-37.563,179.444,0.28
2021-03-04 17:41:25,4.0,1.0,7.4,"S OF RAOUL ISLAND, KERMADEC ISLANDS",-29.613,-177.843,0.31


In [34]:

guest_list = [('Ken', 30, "Chef"), ("Pat", 35, 'Lawyer'), ('Amanda', 25, "Engineer")]

for guest in guest_list:
    print(f"{guest[0]} is {guest[1]} years old and works as {guest[2]}.")

 

    

Ken is 30 years old and works as Chef.
Pat is 35 years old and works as Lawyer.
Amanda is 25 years old and works as Engineer.
