Please click Cells and then Run All.

In [1]:
from IPython.display import Image
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')



In [2]:
%matplotlib inline

import numpy as np
import pandas as pd
import plotly.plotly as py
import matplotlib.pyplot as plt
import seaborn as sns

from plotly import tools
from plotly.graph_objs import *
from plotly.grid_objs import Grid, Column
from plotly.offline import download_plotlyjs, init_notebook_mode,  iplot, plot

init_notebook_mode(connected=True)

Data for this notebook was pulled from [USGS](https://earthquake.usgs.gov/earthquakes/map/).

In [3]:
quake_df = pd.read_csv('2.5_week.csv')

In [4]:
quake_df['times'] = [pd.Timestamp(t).round('h') for t in quake_df['time'].astype(str)]

In [5]:
quake_df['time_int'] = np.arange(0, len(quake_df))

# Quake Watch

The last time I was home happened to coincide with renewed activity from the Kilauea volcano. Before the actual lava flow appeared, we experienced numerous earthquakes as the lava created a new path through the island.

I wanted to better understand the rate and magnitude of the quakes since it was assumed the increasing number was leading to something. I also wanted to experiment with Plotly visualizations.

In [6]:
# Throwaway public API
import plotly
plotly.tools.set_credentials_file(username='bhagerman', api_key='GnJmY08anEPifrOyWDJZ')

In [7]:
# Get list of years
hours = quake_df["times"].unique()
hours = list(sorted(hours.astype(str)))

hlist = quake_df.groupby("times").count()["time_int"]
hlist_cum = hlist.cumsum()

# Convert list items to string
hlist = list(hlist.astype(str))
hlist_cum = list(hlist_cum.astype(str))

The two charts below show recorded earthquakes over time both in terms of magnitude and overall number. In both visualizations there is an evident clustering of events around and following May 5th.

In [8]:
trace1 = Scatter(
    x = quake_df['times'],
    y = quake_df['mag'],
    mode = 'lines+markers',
    name = 'Quake Magnitude'
)

trace2 = Scatter(
    x = hours,
    y = hlist_cum,
    mode = 'lines+markers',
    name = 'Quake Cumulative Sum'
)

fig = tools.make_subplots(rows=2, cols=1, print_grid=False)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
#fig.append_trace(trace3, 3, 1)

fig['layout'].update(height=600, width=1000, title='Kilauea Earthquakes')

py.iplot(fig, filename='quake_sub_test', validate=True)

Below is the same set of measurements but plotted spatially. Areas with the largest earthquakes correspond to where the lava ultimately fissured from.

In [9]:
data = Data([
    Scattermapbox(
        lat=quake_df['latitude'].values,
        lon=quake_df['longitude'].values,
        mode='markers',
        marker=Marker(
            size=quake_df['mag'].values * 2,
            color=quake_df['mag'],
            showscale=True,
            cmax=7,
            cmin=0,
            opacity=0.7
        ),
        text=quake_df['mag'].values
    )
])

layout = Layout(
    title='Quakes Over Time',
    autosize=True,
    hovermode='closest',
    
    mapbox=dict(
        accesstoken='pk.eyJ1IjoiYmhhZ2VybWFuIiwiYSI6ImNqZ3Y2YzF2ZzBlMGEzMG85ZW9lbTN1MDcifQ.v9OoLfA1qxCGgHWoQwi2Hw',
        bearing=0,
        center=dict(
            lat=19.3,
            lon=-155
        ),
        pitch=2,
        zoom=8,
        style='light'
    ),
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Test Mapbox')

The photo below was taken on May 19th, just two weeks after the observed peak in earthquake frequency.

In [10]:
Image(url= "https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/USGS_K%C4%ABlauea_multimediaFile-2062.jpg/800px-USGS_K%C4%ABlauea_multimediaFile-2062.jpg", width=800, height=800)