# Visualization of a low-frequency eathquake catalog

Some description of LFEs and dataset

## User parameters

There are two catalogs, one with LFE detections from 2007 to 2009, the other one with LFE detections from 2004 to 2001. Choose the catalog to visualize.

In [1]:
#period = '2004_2011'
period = '2007_2009'

You may group LFEs per month, day, or hour. Choose which one you prefer for visualization.

In [2]:
#group = 'month'
group = 'day'
#group = 'hour'

## Reading and preprocessing the dataset

In [3]:
import altair as alt
import numpy as np
import pandas as pd
import pickle
from datetime import datetime
from vega_datasets import data

Read the locations of the LFE families.

In [4]:
locations = pd.read_csv('catalog/families_locations.txt', header=None, sep='\s+', engine='python')
locations.columns = ['name', 'latitude', 'longitude', 'depth']

In [5]:
threshold = pd.read_csv('catalog/families_threshold.txt', header=None, sep='\s+', engine='python')
threshold.columns = ['name', '2007_2009', '2004_2011']

In [6]:
families = locations.merge(threshold, on='name')

Create empty dataframe to put the data into a tidy format readable by Altair.

In [7]:
df = pd.DataFrame(columns=['name', 'latitude', 'longitude', 'time', 'count'])

Define time colums to keep and to drop out of the catalog

In [8]:
time = ['year', 'month', 'day', 'hour', 'minute', 'second']
if group == 'month':
    keep = time[0:2]
    delete = time[2:] + ['cc', 'nchannel']
elif group == 'day':
    keep = time[0:3]
    delete = time[3:] + ['cc', 'nchannel']
elif group == 'hour':
    keep = time[0:4]
    delete = time[4:] + ['cc', 'nchannel']
else:
    print('You should have chosen month, day, or hour for making groups')

Add data to tidy dataset.

In [9]:
for i in range(0, len(families)):
    if families[period].iloc[i] > 0.0:
        filename = 'catalog/' + families.name.iloc[i] + '/catalog_' + period + '.pkl'
        catalog = pickle.load(open(filename, 'rb'))
        catalog = catalog.loc[catalog['cc'] * catalog['nchannel'] >= families[period].iloc[i]]
        count = np.zeros(len(catalog))
        catalog['count'] = count
        catalog = catalog.groupby(keep).count().drop(columns=delete).reset_index()
        time = catalog.drop(columns=['count'])
        time = pd.to_datetime(time)
        catalog['time'] = time
        catalog.drop(columns=keep, inplace=True)
        name = np.repeat(families.name.iloc[i], len(catalog))
        catalog['name'] = name
        latitude = np.repeat(families.latitude.iloc[i], len(catalog))
        catalog['latitude'] = latitude
        longitude = np.repeat(families.longitude.iloc[i], len(catalog))
        catalog['longitude'] = longitude
        df = pd.concat([df, catalog])

In [10]:
df.head()

Unnamed: 0,name,latitude,longitude,time,count
0,080401.05.050,40.09,-122.87,2007-07-25,2
1,080401.05.050,40.09,-122.87,2007-07-28,3
2,080401.05.050,40.09,-122.87,2007-08-07,2
3,080401.05.050,40.09,-122.87,2007-08-26,2
4,080401.05.050,40.09,-122.87,2007-09-16,4


In [11]:
max_count = df['count'].max()

## Visualization

In [24]:
brush = alt.selection(type='interval', encodings=['x'])

points = alt.layer(
    alt.Chart(alt.topo_feature('map/7counties.topojson', 'collection')).mark_geoshape(
        fill='#ddd', stroke='#fff', strokeWidth=1
    ),
    alt.Chart().mark_circle(
    ).encode(
        latitude='latitude:Q',
        longitude='longitude:Q',
        size=alt.Size('count:Q', scale=alt.Scale(domain=[0, max_count])),
        color=alt.Color('count:Q', scale=alt.Scale(domain=[0, max_count]))
    ).transform_filter(
        brush.ref()
    )
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
#).configure_view(
#    stroke=None
)

bars = alt.Chart().mark_bar(
    ).encode(
        x = alt.X('time:T', axis = alt.Axis(format = '%Y/%m/%d', title = 'Time')),
        y = alt.Y('count:Q', axis = alt.Axis(title = 'Number of LFEs')),
    ).transform_filter(
        'datum.name == "080401.05.050"'
    ).properties(
        width = 500,
        height = 100,
        selection = brush
    )

myChart = alt.vconcat(points, bars, data=df)
myChart.save('docs/index.html')
myChart

Diverse attempts at improving

In [21]:
alt.layer(
    alt.Chart(alt.topo_feature('map/7counties.topojson', 'collection')).mark_geoshape(
        fill='#ddd', stroke='#fff', strokeWidth=1
    ),
    alt.Chart(df).mark_circle(
    ).encode(
        latitude='latitude:Q',
        longitude='longitude:Q',
        size=alt.Size('count:Q', scale=alt.Scale(domain=[0, max_count])),
        color=alt.Color('count:Q', scale=alt.Scale(domain=[0, max_count])),
        tooltip=['name', 'count']
    )
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)