In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
import regex as re
import plotly.express as px
import requests
from bs4 import BeautifulSoup as BS
from io import StringIO
import statsmodels
%matplotlib inline

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

USGS

https://earthquake.usgs.gov/fdsnws/event/1/[METHOD[?PARAMETERS]]

catalogs -- request available catalogs.
https://earthquake.usgs.gov/fdsnws/event/1/catalogs


contributors -- request available contributors
https://earthquake.usgs.gov/fdsnws/event/1/contributors

count -- to perform a count on a data request. Count uses the same parameters as the query method, and is availablein these formats: plain text (default), geojson, and xml.
https://earthquake.usgs.gov/fdsnws/event/1/count?format=geojson
https://earthquake.usgs.gov/fdsnws/event/1/count?starttime=2014-01-01&endtime=2014-01-02

query -- to submit a data request. See the parameters section for supported url parameters.
https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2014-01-01&endtime=2014-01-02

https://earthquake.usgs.gov/fdsnws/event/1/query?format=xml&starttime=2014-01-01&endtime=2014-01-02&minmagnitude=5

version -- request full service version number
https://earthquake.usgs.gov/fdsnws/event/1/version

Query method parameters should be submitted as key=value pairs using the HTTP GET method and may not be specified more than once; if a parameter is submitted multiple times the result is undefined.

#### Define payload parameters

In [3]:
payload = {'format' :'csv',
          'starttime':'1900-01-01',
          'minlatitude':'34.964513',
          'maxlatitude':'36.723245',
          'minlongitude':'-90.282394',
          'maxlongitude':'-81.647141',
          'orderby':'time'}
#,'limit':'200000'

#### Check number of records with 'count' method

In [4]:
records_count = 'https://earthquake.usgs.gov/fdsnws/event/1/count?'

In [5]:
r = requests.get(url=records_count, params=payload)

In [6]:
print(requests.get(url=records_count, params=payload).text)

9239


### Run query & read in results to dataframe

In [7]:
query = 'https://earthquake.usgs.gov/fdsnws/event/1/query?'

In [8]:
r = requests.get(url=query, params=payload)

In [9]:
print(r)

<Response [200]>


In [10]:
usgs = pd.read_csv(StringIO(r.text))

In [11]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

In [12]:
usgs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9239 entries, 0 to 9238
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   time             9239 non-null   object 
 1   latitude         9239 non-null   float64
 2   longitude        9239 non-null   float64
 3   depth            9226 non-null   float64
 4   mag              9236 non-null   float64
 5   magType          9228 non-null   object 
 6   nst              6206 non-null   float64
 7   gap              6204 non-null   float64
 8   dmin             6025 non-null   float64
 9   rms              6216 non-null   float64
 10  net              9239 non-null   object 
 11  id               9239 non-null   object 
 12  updated          9239 non-null   object 
 13  place            9239 non-null   object 
 14  type             9239 non-null   object 
 15  horizontalError  6265 non-null   float64
 16  depthError       6264 non-null   float64
 17  magError      

### Cleaning the data

#### Identify columns that can be dropped

In [13]:
usgs=usgs.drop(columns= ['nst','gap', 'dmin', 'rms', 'net', 'updated', 'horizontalError', 'depthError', 'magError', 'magNst', 'status',
       'locationSource', 'magSource'])

#### Change time column from timestamp object to datetime.

Time data initially stored as a timestamp.

2012-03-20T08:55:21.270Z --> usgs.time.dtype --> dtype('0') object

1975-01-08 23:20:34.200000+00:00 --> usgs.time.dtype --> datetime64[ns, UTC]


In [14]:
usgs['time']=pd.to_datetime(usgs['time'])

In [15]:
usgs.time.min()

Timestamp('1903-11-04 18:18:00+0000', tz='UTC')

#### Exclude earthquake events not associated with TN.

In [16]:
usgs.place.str.contains('Tennessee').value_counts()

True     4771
False    4468
Name: place, dtype: int64

In [17]:
usgs = usgs[usgs['place'].str.contains('Tennessee')]

#### Reorder & rename columns for convenience

In [18]:
usgs = usgs.rename(columns={"magType":"mag_type"})

In [19]:
usgs=usgs[['time','depth','mag','mag_type','place','latitude','longitude','type','id']]

#### Check for nan values in key columns
There are 13 records with nan depth, 8 with nan magnitude type and 3 with nan magnitude

In [20]:
usgs = usgs.dropna(subset=['mag','depth'])

In [21]:
usgs['mag_type']=usgs['mag_type'].replace(np.nan,'Unknown',regex=True)

In [22]:
#usgs[usgs['mag_type'].isnull()]

#### Parse the magnitude values into bins

Note: Setting the bin floor at 0.0 caused records with a magnitude of 0.0 to return NaN as the bin label. This in turn caused errors when plotting.  Setting the interval to start at -0.5 solved this.

In [23]:
#Binning magnitude by whole number intervals
mag_labels_whole = ['-0.50-0.99', '1.0-1.99', '2.0-2.99', '3.0-3.99','4.0-4.99', '5.0-5.99', '6.0-6.99']
mag_cut_whole = [-0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]
usgs['mag_bins_whole'] = pd.cut(usgs['mag'], bins=mag_cut_whole, labels=mag_labels_whole)#.value_counts()

In [24]:
#Binning magnitude by intervals of .5
mag_labels_half = ['-0.50-0.49', '0.50-0.99', '1.0-1.49', '1.50-1.99','2.0-2.49', '2.5-2.99', '3.0-3.49','3.5-3.99','4.0-4.49', '4.5-4.99', '5.49-5.99','6.49-7.0',]
mag_cut_half = [-0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 6.0, 7.0]
usgs['mag_bins_half'] = pd.cut(usgs['mag'], bins=mag_cut_half, labels=mag_labels_half)#.value_counts()

In [25]:
#Another way to define bins with pd.cut() -- pd.interval_range does not allow bins to have custom labels
#interval_range = pd.interval_range(start=-0.5, freq=0.5, end=5.01, closed='right')
#usgs['mag_bins_2'] = pd.cut(usgs['mag'], bins=interval_range)

In [26]:
usgs.mag_bins_half.dtype

CategoricalDtype(categories=['-0.50-0.49', '0.50-0.99', '1.0-1.49', '1.50-1.99',
                  '2.0-2.49', '2.5-2.99', '3.0-3.49', '3.5-3.99', '4.0-4.49',
                  '4.5-4.99', '5.49-5.99', '6.49-7.0'],
                 ordered=True)

#### Earthquake Depth
- Default is 5km, per USGS documentation. 
- Should any correlation exclude 5km measurement?

In [27]:
usgs.depth.describe()

count    4764.000000
mean        8.604282
std         4.558553
min         0.000000
25%         5.700000
50%         7.680000
75%         9.850000
max        37.300000
Name: depth, dtype: float64

### Initial Analysis

In [29]:
usgs[['depth','mag']].describe()

Unnamed: 0,depth,mag
count,4764.0,4764.0
mean,8.604282,1.651629
std,4.558553,0.513087
min,0.0,0.0
25%,5.7,1.3
50%,7.68,1.6
75%,9.85,1.9
max,37.3,4.7


### What does the distribution of earthquakes by magnitude show?
Fairly normal distribution.

In [33]:
fig = px.histogram(usgs, x="mag", 
                   histfunc='count',
                   nbins=50, 
                   title='Distribution of Earthquake Magnitudes', 
                   hover_data=usgs)
fig.show()

### Any patterns with magnitude scale and earthquakes over the years?
- Abrupt change in magnitude scale in USGS records in 1980. Interesting but not worth the additional research time for the end goal.

In [34]:
fig = px.histogram(usgs, x="time", histfunc='count',
                   color = 'mag_type', 
                   title='Count of Earthquakes & Magnitude Scales Over Time', 
                   hover_data=['mag'])
fig.show()

### Are there patterns in earthquake magnitudes through the years?
- Discuss whether this is a function of more frequent earthquakes or simply better research/technology to identify and record them.

In [35]:
df=usgs.sort_values(by='mag_bins_half', ascending = True)
fig = px.histogram(df, x='time', histfunc='count', color='mag_bins_half', 
                color_discrete_sequence= px.colors.sequential.Plasma_r,
                   hover_data=usgs, title='TN Earthquake Magnitudes')
fig.show()

#### Notes/Scraps

In [46]:
#Example of df.query() in order to query or filter when visualizing with Plotly - reduces number of separate dataframes
#usgs.query('place.str.contains("Tennessee")', engine='python')

#dataframe[dataframe.summary.str.contains('Windows Failed Login', case=False)]
#df.query('column_name == "value"')
#pd_df.query('column_name.str.contains("abc")', engine='python')

#usgs_data[usgs_data['place'].str.match('Tennessee')]

### How has the frequency of earthquakes in TN - as measured by the time between them - changed?  How is it impacted by magnitude?

#### Create a copy of the time column before setting datetime as index.  The duplicate time column is used to calculate timedelta between earthquake events. 
Note: The timedelta calculation will not work if values are not sorted properly.  

In [36]:
#Create new df from copy of original. Sort time (ascending) and mag_bins (descending)
usgs_dt = usgs.copy().sort_values(['mag_bins_half','time'], ascending=(False,True))

In [37]:
#create a copy of time column with which to calculate timedelta
usgs_dt['calc_time']=usgs_dt['time']

In [38]:
#set datetime index
usgs_dt = usgs_dt.set_index('time')

In [40]:
#Groupby magnitude bin values and calculate the timedelta between earthquake events.
usgs_dt['elapsed_time'] = usgs_dt.groupby('mag_bins_half')['calc_time'].diff()

In [41]:
#Split off another dataframe without binning the magnitude values

In [42]:
usgs_dt_mag=usgs.copy().sort_values(['mag','time'], ascending=(False, True))

In [43]:
#Groupby magnitude bin values and calculate the timedelta between earthquake events.
usgs_dt_mag['elapsed_time'] = usgs_dt.groupby('mag')['calc_time'].diff()

In [44]:
usgs_dt.calc_time.max()

Timestamp('2020-11-07 03:10:39.070000+0000', tz='UTC')

### Analyze timedelta values

In [45]:
usgs_dt[['depth','mag','elapsed_time']].describe()

Unnamed: 0,depth,mag,elapsed_time
count,4764.0,4764.0,4754
mean,8.604282,1.651629,36 days 23:45:22.319859
std,4.558553,0.513087,310 days 10:11:42.973755
min,0.0,0.0,0 days 00:00:02.340000
25%,5.7,1.3,1 days 18:17:25.287500
50%,7.68,1.6,5 days 21:10:18.410000
75%,9.85,1.9,15 days 17:11:56.430000
max,37.3,4.7,11583 days 01:45:13.610000


### Earthquakes of smaller magnitudes (excluding those in the lowest bucket) occur far more frequently than greater magnitudes. 

In [48]:
usgs_dt.groupby('mag_bins_half')['elapsed_time'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
mag_bins_half,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-0.50-0.49,49,290 days 01:20:51.748367,561 days 23:22:44.264075,0 days 00:01:01.400000,2 days 20:34:23,36 days 05:32:43.260000,204 days 21:34:01.500000,2767 days 15:34:13.790000
0.50-0.99,284,59 days 07:03:37.267323,143 days 23:59:35.383141,0 days 00:00:21.500000,4 days 21:56:57.082500,16 days 15:49:20.755000,53 days 01:33:56.537500,1469 days 22:01:41.500000
1.0-1.49,1832,9 days 05:28:43.960633,20 days 21:43:45.727243,0 days 00:00:02.340000,1 days 01:11:57.535000,3 days 23:01:27.835000,10 days 00:11:53.907500,308 days 20:08:07.090000
1.50-1.99,1678,10 days 02:11:04.487228,16 days 10:01:08.368124,0 days 00:00:29,1 days 17:22:40.720000,5 days 02:15:34.170000,11 days 15:29:54.337500,217 days 08:04:56.710000
2.0-2.49,661,25 days 18:40:01.594856,44 days 17:54:15.910397,0 days 00:00:07.930000,3 days 13:47:12.530000,10 days 15:47:01.570000,27 days 03:23:04.220000,353 days 14:54:11.420000
2.5-2.99,179,92 days 08:13:58.227709,131 days 23:08:49.275903,0 days 00:00:07.760000,12 days 22:37:57.800000,39 days 18:20:11.070000,117 days 09:23:07.130000,873 days 06:33:15
3.0-3.49,56,376 days 23:36:55.686428,587 days 09:05:12.206881,1 days 09:10:51.440000,81 days 13:09:19.075000,246 days 23:21:25.895000,440 days 07:53:52.805000,4117 days 16:53:22.700000
3.5-3.99,10,2336 days 18:14:47.476000,2689 days 03:56:43.790402,248 days 02:54:14.300000,546 days 20:18:35.227500,1081 days 18:05:22.580000,2903 days 10:25:38.212500,8597 days 00:34:39.500000
4.0-4.49,5,6582 days 05:50:22.682000,4097 days 08:38:59.731341,1973 days 17:07:38.800000,4357 days 18:29:07.400000,4826 days 06:16:52.600000,10170 days 09:33:01,11583 days 01:45:13.610000
4.5-4.99,0,NaT,NaT,NaT,NaT,NaT,NaT,NaT


#### Double check that these numbers are correct.
It appears that earthquakes of very low magnitude are less frequen than earthquakes with slightly higher magnitude. There is a gap in events with magnitude <.5 between 1991 & 1996 and 2001 & 2007
Is this really due to earthquake occurence or due to the data being collected?

In [49]:
#usgs_dt[usgs_dt['mag_bins']=='-0.50-0.49'].elapsed_time.mean()

In [50]:
#usgs_dt[usgs_dt['mag_bins']=='-0.50-0.49'].elapsed_time.mean()

In [51]:
#usgs_dt[usgs_dt['mag_bins']=='-0.50-0.49'].shape

In [52]:
usgs_dt[usgs_dt['mag_bins_half']=='-0.50-0.49']

Unnamed: 0_level_0,depth,mag,mag_type,place,latitude,longitude,type,id,mag_bins_whole,mag_bins_half,calc_time,elapsed_time
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1979-02-03 11:49:15+00:00,1.4,0.5,mlg,"3km ENE of Ridgely, Tennessee",36.27,-89.45,earthquake,nm600934,-0.50-0.99,-0.50-0.49,1979-02-03 11:49:15+00:00,NaT
1981-08-06 23:31:02.400000+00:00,7.5,0.0,md,"7km WNW of Tiptonville, Tennessee",36.41,-89.54,earthquake,nm601508,-0.50-0.99,-0.50-0.49,1981-08-06 23:31:02.400000+00:00,915 days 11:41:47.400000
1981-08-08 02:18:04.500000+00:00,6.8,0.0,md,"12km NW of Tiptonville, Tennessee",36.46,-89.57,earthquake,nm601511,-0.50-0.99,-0.50-0.49,1981-08-08 02:18:04.500000+00:00,1 days 02:47:02.100000
1981-08-14 19:04:33.600000+00:00,7.1,0.0,md,"4km W of Tiptonville, Tennessee",36.38,-89.52,earthquake,nm601513,-0.50-0.99,-0.50-0.49,1981-08-14 19:04:33.600000+00:00,6 days 16:46:29.100000
1981-08-15 09:13:33.100000+00:00,6.6,0.0,md,"9km NW of Tiptonville, Tennessee",36.43,-89.56,earthquake,nm601514,-0.50-0.99,-0.50-0.49,1981-08-15 09:13:33.100000+00:00,0 days 14:08:59.500000
1981-08-15 20:30:47.800000+00:00,2.8,0.0,md,"5km WNW of Tiptonville, Tennessee",36.4,-89.53,earthquake,nm601518,-0.50-0.99,-0.50-0.49,1981-08-15 20:30:47.800000+00:00,0 days 11:17:14.700000
1981-08-18 17:05:10.800000+00:00,5.0,0.0,md,"4km NW of Tiptonville, Tennessee",36.4,-89.51,earthquake,nm601528,-0.50-0.99,-0.50-0.49,1981-08-18 17:05:10.800000+00:00,2 days 20:34:23
1981-08-20 19:56:49.800000+00:00,2.2,0.0,md,"3km NW of Tiptonville, Tennessee",36.4,-89.5,earthquake,nm601531,-0.50-0.99,-0.50-0.49,1981-08-20 19:56:49.800000+00:00,2 days 02:51:39
1981-08-21 01:35:54.100000+00:00,5.5,0.0,md,"7km WSW of Tiptonville, Tennessee",36.36,-89.55,earthquake,nm601532,-0.50-0.99,-0.50-0.49,1981-08-21 01:35:54.100000+00:00,0 days 05:39:04.300000
1981-08-22 00:34:33.900000+00:00,6.4,0.0,md,"11km NW of Tiptonville, Tennessee",36.46,-89.55,earthquake,nm601534,-0.50-0.99,-0.50-0.49,1981-08-22 00:34:33.900000+00:00,0 days 22:58:39.800000


### Any difference in the past 20 years vs dataset as a whole?

In [42]:
usgs_dt.loc['2000':'2020'].groupby('mag_bins')['elapsed_time'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
mag_bins,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-0.50-0.49,24,275 days 17:30:59.100416,523 days 07:05:56.280385,0 days 05:01:07.290000,27 days 00:54:39.547500,52 days 00:56:47.055000,229 days 05:33:31.162500,2219 days 22:50:44.870000
0.50-0.99,294,25 days 18:50:10.409455,41 days 03:59:06.742603,0 days 00:00:39.220000,3 days 13:21:35.560000,10 days 10:24:45.230000,30 days 17:50:44.660000,359 days 03:07:30.190000
1.0-1.49,2144,3 days 13:04:35.818689,4 days 09:57:30.940504,0 days 00:00:03.590000,0 days 15:30:40.925000,2 days 02:26:58.710000,4 days 16:54:20.545000,46 days 09:18:11.350000
1.50-1.99,2229,3 days 10:07:58.608914,4 days 04:59:43.835416,0 days 00:00:00.040000,0 days 16:06:43.850000,1 days 22:35:35.630000,4 days 14:44:55.980000,50 days 07:24:14.790000
2.0-2.49,918,8 days 07:24:25.262854,12 days 17:30:12.048076,0 days 00:00:07.930000,1 days 17:29:19.817500,4 days 04:31:52.235000,9 days 13:42:16.550000,142 days 13:10:49.750000
2.5-2.99,239,31 days 17:02:17.495941,43 days 08:58:39.183558,0 days 00:00:13.260000,6 days 21:03:04.585000,15 days 09:24:05,41 days 09:51:47.965000,298 days 20:28:24.900000
3.0-3.49,38,199 days 02:25:48.251842,279 days 04:32:10.542864,15 days 05:07:39.710000,40 days 02:36:18.440000,107 days 07:46:24.585000,222 days 14:49:26.582500,1539 days 01:00:44.280000
3.5-3.99,8,1026 days 03:50:20.985000,1197 days 02:21:02.641790,83 days 15:34:30.620000,226 days 19:51:50.105000,613 days 12:18:54.455000,1220 days 08:55:16.425000,3507 days 00:42:01.550000
4.0-4.49,3,3361 days 10:38:36.336666,2842 days 00:25:00.930149,79 days 22:32:37.710000,2526 days 09:34:54.910000,4972 days 20:37:12.110000,5002 days 04:41:35.650000,5031 days 12:45:59.190000
4.5-5.0,0,NaT,NaT,NaT,NaT,NaT,NaT,NaT


In [54]:
usgs.mag_bins_half.unique().sort_values()

[-0.50-0.49, 0.50-0.99, 1.0-1.49, 1.50-1.99, 2.0-2.49, 2.5-2.99, 3.0-3.49, 3.5-3.99, 4.0-4.49, 4.5-4.99]
Categories (10, object): [-0.50-0.49 < 0.50-0.99 < 1.0-1.49 < 1.50-1.99 ... 3.0-3.49 < 3.5-3.99 < 4.0-4.49 < 4.5-4.99]

### Isolate only earthquakes with magnitude 2.0 and greater

In [56]:
#create ist of bin labels
mag_bins_threshold = ['2.0-2.49', '2.5-2.99', '3.0-3.49', '3.5-3.99', '4.0-4.49', '4.5-5.0']

In [58]:
usgs_dt[usgs_dt['mag_bins_half'].isin(mag_bins_threshold)]

Unnamed: 0_level_0,depth,mag,mag_type,place,latitude,longitude,type,id,mag_bins_whole,mag_bins_half,calc_time,elapsed_time
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1928-11-03 04:02:50.200000+00:00,5.00,4.50,lg,eastern Tennessee,36.112000,-82.828000,earthquake,ushis794,4.0-4.99,4.0-4.49,1928-11-03 04:02:50.200000+00:00,NaT
1956-09-07 13:35:51.200000+00:00,5.00,4.10,fa,eastern Tennessee,36.445000,-83.787000,earthquake,ushis2101,4.0-4.99,4.0-4.49,1956-09-07 13:35:51.200000+00:00,10170 days 09:33:01
1962-02-02 06:43:30+00:00,4.00,4.23,mw,Tennessee,36.374000,-89.511000,earthquake,ushis2536,4.0-4.99,4.0-4.49,1962-02-02 06:43:30+00:00,1973 days 17:07:38.800000
1974-01-08 01:12:37.400000+00:00,1.00,4.10,mb,Tennessee,36.200000,-89.390000,earthquake,usp00004v5,4.0-4.99,4.0-4.49,1974-01-08 01:12:37.400000+00:00,4357 days 18:29:07.400000
1987-03-27 07:29:30+00:00,19.50,4.20,md,"3km SE of Vonore, Tennessee",35.570000,-84.210000,earthquake,se603077,4.0-4.99,4.0-4.49,1987-03-27 07:29:30+00:00,4826 days 06:16:52.600000
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-10-20 18:50:21.800000+00:00,8.84,2.33,md,"3 km W of Ridgely, Tennessee",36.267500,-89.523333,earthquake,nm60312672,2.0-2.99,2.0-2.49,2020-10-20 18:50:21.800000+00:00,0 days 22:30:43.310000
2020-10-25 17:18:57.880000+00:00,18.54,2.45,md,Tennessee,35.666833,-84.024667,earthquake,se60313102,2.0-2.99,2.0-2.49,2020-10-25 17:18:57.880000+00:00,4 days 22:28:36.080000
2020-10-26 08:35:34.140000+00:00,6.88,2.03,md,"6 km E of Ridgely, Tennessee",36.253167,-89.417500,earthquake,nm60313127,2.0-2.99,2.0-2.49,2020-10-26 08:35:34.140000+00:00,0 days 15:16:36.260000
2020-10-31 15:53:45.740000+00:00,4.69,2.37,md,"6 km NW of Greeneville, Tennessee",36.207667,-82.870000,earthquake,se60313562,2.0-2.99,2.0-2.49,2020-10-31 15:53:45.740000+00:00,5 days 07:18:11.600000


In [64]:
fig = px.box(usgs_dt, x='mag_bins_half',
                  y='elapsed_time',
                   title='Time Between Earthquakes', 
                   hover_data=usgs_dt)
fig.show()

In [65]:
df=usgs_dt[usgs_dt['mag_bins_half'].isin(mag_bins_threshold)]
fig = px.box(df, x='mag_bins_half',
                  y='elapsed_time',
                   title='Time Between Earthquakes Magnitude >= 2.0', 
                   hover_data=usgs_dt)
fig.show()

In [69]:
df=usgs_dt.loc['2000':'2020']#.groupby('mag_bins')#['elapsed_time']
fig = px.box(df, x='mag_bins_half',
                  y='elapsed_time',
                   title='Time Between Earthquakes - 2000-2020', 
                   hover_data=usgs_dt)
fig.show()

### Note
One common need for time series data is resampling at a higher or lower frequency. This can be done using the resample() method, or the much simpler asfreq() method. The primary difference between the two is that resample() is fundamentally a data aggregation, while asfreq() is fundamentally a data selection.
https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html


In [75]:
df = usgs_dt.loc['1980':'2020'].sort_values('mag')
fig = px.histogram(df, x='mag_bins_half', 
                   histfunc='count',
                   #color='mag_type', 
                   nbins=50, 
                   title='Distribution of Earthquake Magnitudes 1980-2020', 
                   hover_data=df)
fig.show()

### Export USGS dataframe to CSV for another nb.

In [81]:
usgs.to_csv('../data/usgs_tn_earthquakes.csv', index = False)

In [80]:
usgs.mag_bins_half.dtype

CategoricalDtype(categories=['-0.50-0.49', '0.50-0.99', '1.0-1.49', '1.50-1.99',
                  '2.0-2.49', '2.5-2.99', '3.0-3.49', '3.5-3.99', '4.0-4.49',
                  '4.5-4.99', '5.49-5.99', '6.49-7.0'],
                 ordered=True)