In [1]:
import os
import requests
from datetime import datetime

import geopandas as gpd
import geojsonio

import pandas as pd
from pandas.io.json import json_normalize

import folium
from folium import plugins

# USGS GeoJSON format

schema = {
  type: "FeatureCollection",
  metadata: {
    generated: Long Integer,
    url: String,
    title: String,
    api: String,
    count: Integer,
    status: Integer
  },
  bbox: [
    minimum longitude,
    minimum latitude,
    minimum depth,
    maximum longitude,
    maximum latitude,
    maximum depth
  ],
  features: [
    {
      type: "Feature",
      properties: {
        mag: Decimal,
        place: String,
        time: Long Integer,
        updated: Long Integer,
        tz: Integer,
        url: String,
        detail: String,
        felt:Integer,
        cdi: Decimal,
        mmi: Decimal,
        alert: String,
        status: String,
        tsunami: Integer,
        sig:Integer,
        net: String,
        code: String,
        ids: String,
        sources: String,
        types: String,
        nst: Integer,
        dmin: Decimal,
        rms: Decimal,
        gap: Decimal,
        magType: String,
        type: String
      },
      geometry: {
        type: "Point",
        coordinates: [
          longitude,
          latitude,
          depth
        ]
      },
      id: String
    },
    …
  ]
}

## Getting data

In [2]:
data = requests.get("https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson")
jsondata = data.json()
df = pd.json_normalize(jsondata['features'])

In [3]:
df.head(3)

Unnamed: 0,type,id,properties.mag,properties.place,properties.time,properties.updated,properties.tz,properties.url,properties.detail,properties.felt,...,properties.types,properties.nst,properties.dmin,properties.rms,properties.gap,properties.magType,properties.type,properties.title,geometry.type,geometry.coordinates
0,Feature,us6000dhz0,4.9,"206 km WSW of Bandar Lampung, Indonesia",1613452223356,1613453283040,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,,...,",origin,phase-data,",,1.993,0.66,151.0,mb,earthquake,"M 4.9 - 206 km WSW of Bandar Lampung, Indonesia",Point,"[103.5916, -6.2629, 34.78]"
1,Feature,us6000dhys,5.0,"67 km WSW of Port-Vila, Vanuatu",1613450658452,1613454902861,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,0.0,...,",dyfi,origin,phase-data,",,4.145,1.02,37.0,mb,earthquake,"M 5.0 - 67 km WSW of Port-Vila, Vanuatu",Point,"[167.7219, -17.9585, 10]"
2,Feature,us6000dhyh,4.1,"167 km WSW of Colchani, Bolivia",1613447372717,1613450343040,,https://earthquake.usgs.gov/earthquakes/eventp...,https://earthquake.usgs.gov/earthquakes/feed/v...,,...,",origin,phase-data,",,0.926,0.48,111.0,mb,earthquake,"M 4.1 - 167 km WSW of Colchani, Bolivia",Point,"[-68.4543, -20.7995, 168.45]"


# Cleaning the data

Some of these fields are essientially useless in all cases. For example, geometry type. All of them are points by schema definition.

A couple are just redundant. For example title is just 'mag' plus 'place'

Some are probably useless but *might* be useful, particularly if we apply a ML model to it later. So, I'll keep them for now. But the subfeatures of 'properties' and 'geometry' have to go

In [4]:
df.columns = df.columns.str.replace('properties.', "")
df.columns = df.columns.str.replace('geometry.', "")
df.drop(['id', 'type', 'updated', 'tz', 'mmi', 'detail', 'felt','cdi', 'felt', 'types', 'nst', 'type', 'title'], 
        axis=1, inplace=True)

  df.columns = df.columns.str.replace('properties.', "")
  df.columns = df.columns.str.replace('geometry.', "")


In [5]:
df.head(3)

Unnamed: 0,mag,place,time,url,alert,status,tsunami,sig,net,code,ids,sources,dmin,rms,gap,magType,coordinates
0,4.9,"206 km WSW of Bandar Lampung, Indonesia",1613452223356,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,369,us,6000dhz0,",us6000dhz0,",",us,",1.993,0.66,151.0,mb,"[103.5916, -6.2629, 34.78]"
1,5.0,"67 km WSW of Port-Vila, Vanuatu",1613450658452,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,385,us,6000dhys,",us6000dhys,",",us,",4.145,1.02,37.0,mb,"[167.7219, -17.9585, 10]"
2,4.1,"167 km WSW of Colchani, Bolivia",1613447372717,https://earthquake.usgs.gov/earthquakes/eventp...,,reviewed,0,259,us,6000dhyh,",us6000dhyh,",",us,",0.926,0.48,111.0,mb,"[-68.4543, -20.7995, 168.45]"


In [7]:
df['time'] = pd.to_datetime(df['time'], unit='ms')


## Parsing the data

In [8]:
quakes = pd.json_normalize(data=data['features'], meta=['mag','place','time','updated','tz','url','detail','felt','cdi','mmi','alert','status','tsunami','sig','net','code','ids','sources','types','nst','dmin','rms','gap','magType'])

TypeError: 'Response' object is not subscriptable

There are a lot of columns that are essientially useless

In [None]:
quakes.columns = quakes.columns.map(lambda x: x.split(".")[-1])

quakes.drop(['type', 'updated', 'tz', 'url', 'detail', 'felt', 'cdi', 'mmi', 'alert', 'status', 'net', 
                     'code', 'sources', 'types', 'nst', 'dmin', 'rms', 'gap', 'magType' ], axis=1, inplace=True)

Breakout the coords column

In [None]:
quakes['longitude'] = quakes.coordinates.str[0]
quakes['latitude'] = quakes.coordinates.str[1]
quakes['depth'] = quakes.coordinates.str[2]

quakes.drop(['coordinates', 'title'], axis=1, inplace=True)

In [None]:
quakes['datetime'] = pd.to_datetime(quakes['time']).dt.strftime("%Y-%m-%d %H:%M")
quakes.drop(['time'], axis=1, inplace=True)

In [None]:
quakes.round({'longitude': 5, 'latitude': 5, 'depth': 0})
quakes.head(2)

In [None]:
m = folium.Map(
    location=[0, 0],
    tiles='cartodbpositron',
    zoom_start=1.8
)

Figure out how to get time to display

In [None]:
fmtr = "function(num) {return L.Util.formatNum(num, 3);};"
folium.plugins.MousePosition(separator=' / ', prefix="Lat/Long: ", lat_formatter=fmtr, lng_formatter=fmtr).add_to(m)

In [None]:
for i, row in quakes.iterrows():
    folium.CircleMarker((row.latitude, row.longitude), radius=row.mag*2.2, 
                        color='red', weight = 0, opacity =.4, fill=True, fill_color='orange', fill_opacity=.3, 
                        popup=[f"Time: {row.datetime},\n Mag: {row.mag},\n Depth: {row.depth} km"]).add_to(m)
    

In [None]:
boundaries = ('data/PB2002_boundaries.json')
line_style = {'color': '#FF3333', 'weight': 1, 'opacity':row.mag*1.4}


folium.GeoJson(
    boundaries,
    name='major fault lines',
    style_function=lambda x:line_style,
    smooth_factor=4.0,
      
).add_to(m)
m