In [38]:
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline
%matplotlib notebook


In [3]:
eq_df = pd.read_csv('../oklahoma_earthquakes_largefiles/usgs_eqs_reduced_data.csv', low_memory=False)

In [4]:
eq_df.head()

Unnamed: 0,id,time,mag,magType,cdi,place,status,latitude,longitude,depth,date
0,us2000iz6u,1546166814510,2.2,mb_lg,,"6 km ESE of Cleo Springs, Oklahoma",reviewed,36.3818,-98.3804,6.01,2018-12-30 10:46:54.510
1,us2000iz51,1546159833730,2.0,mb_lg,,"5 km ESE of Cleo Springs, Oklahoma",reviewed,36.3917,-98.3808,6.28,2018-12-30 08:50:33.730
2,us2000iz4w,1546158498840,2.2,ml,,"13 km NNW of Pawnee, Oklahoma",reviewed,36.4443,-96.8762,5.36,2018-12-30 08:28:18.840
3,us2000iyvs,1546073324730,2.5,mb_lg,3.3,"5 km WSW of Arcadia, Oklahoma",reviewed,35.6547,-97.3898,5.0,2018-12-29 08:48:44.730
4,us2000iyvq,1546073222930,2.2,mb_lg,,"6 km W of Arcadia, Oklahoma",reviewed,35.6747,-97.399,7.42,2018-12-29 08:47:02.930


In [8]:
# converting date fields to date-time dtype
eq_df.iloc[:, 10] = eq_df.iloc[:, 10].apply(pd.to_datetime)

In [54]:
# opening injection volume data, converting date fields to date-time dtype
injection_df = pd.read_csv('../oklahoma_earthquakes_largefiles/weekly_volume.csv', low_memory=False)
injection_df.iloc[:, 0] = injection_df.iloc[:, 0].apply(pd.to_datetime)
injection_df.head()

Unnamed: 0,Report_Date,Volume_BPD
0,2014-12-31,3246.0
1,2014-12-30,3373.0
2,2014-12-29,3508.0
3,2014-12-28,3363.0
4,2014-12-27,3226.0


In [121]:
injection_df.max()

Report_Date    2021-07-17 00:00:00
Volume_BPD                  101740
dtype: object

In [170]:
monthly_sum_inj = injection_df.groupby(pd.Grouper(key="Report_Date",freq='M'))["Volume_BPD"].sum()
monthly_sum_inj_df = pd.DataFrame(monthly_sum_inj)
monthly_sum_inj_df.head()

Unnamed: 0_level_0,Volume_BPD
Report_Date,Unnamed: 1_level_1
2012-09-30,16549.0
2012-10-31,81535.0
2012-11-30,74800.0
2012-12-31,241705.0
2013-01-31,460607.0


In [171]:
monthly_sum_inj_df.max()

Volume_BPD    7.599133e+07
dtype: float64

In [112]:
mag_3 = eq_df[eq_df["mag"] >= 3]
mag_3.head()

Unnamed: 0,id,time,mag,magType,cdi,place,status,latitude,longitude,depth,date
5,us2000iytp,1546056357890,3.0,mb_lg,3.1,"3 km ESE of Hennessey, Oklahoma",reviewed,36.0998,-97.8674,5.0,2018-12-29 04:05:57.890
7,us2000iym1,1546000348980,3.1,mb_lg,2.7,"5 km ESE of Cleo Springs, Oklahoma",reviewed,36.3858,-98.386,5.0,2018-12-28 12:32:28.980
11,us2000iwfw,1545520825020,3.1,ml,,"0 km SE of Wakita, Oklahoma",reviewed,36.879,-97.9152,7.21,2018-12-22 23:20:25.020
17,us2000iud8,1545032568230,3.0,mb_lg,2.0,"16 km SSE of Waynoka, Oklahoma",reviewed,36.4566,-98.7897,5.0,2018-12-17 07:42:48.230
19,us2000itxh,1544909464050,3.0,mb_lg,2.2,"5 km WNW of Lucien, Oklahoma",reviewed,36.2856,-97.5158,4.86,2018-12-15 21:31:04.050


In [176]:
monthly_count_eq = mag_3.groupby(pd.Grouper(key="date",freq='M'))["mag"].count()
monthly_count_eq_df = pd.DataFrame(monthly_count_eq)
monthly_count_eq_df.head()

Unnamed: 0_level_0,mag
date,Unnamed: 1_level_1
2009-01-31,1
2009-02-28,3
2009-03-31,1
2009-04-30,0
2009-05-31,0


In [178]:
fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Monthly Number of Earthquakes (M >=3)', color=color)
ax1.plot(monthly_count_eq_df.index, monthly_count_eq_df.mag, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_ylim(-10, 130)
ax1.set_title("Monthly Earthquake Count vs. Arbuckle Monthly SWD Well Injection Volume")


ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Monthly Injection Volume (barrels)', color=color)  # we already handled the x-label with ax1
ax2.plot(monthly_sum_inj_df.index, inj_df.Volume_BPD, color=color)
ax2.tick_params(axis='y', labelcolor=color)
# ax2.set_ylim(0, 13000)

fig.set_size_inches(13, 8)
fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()

<IPython.core.display.Javascript object>

In [179]:
mean_monthly_inj = injection_df.groupby(pd.Grouper(key="Report_Date",freq='M'))["Volume_BPD"].mean()
monthly_mean_inj_df = pd.DataFrame(mean_monthly_inj)
monthly_mean_inj_df.head()

Unnamed: 0_level_0,Volume_BPD
Report_Date,Unnamed: 1_level_1
2012-09-30,1654.9
2012-10-31,2630.16129
2012-11-30,2493.333333
2012-12-31,2177.522523
2013-01-31,2971.658065


In [187]:
fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Monthly Number of Earthquakes (M >=3)', color=color)
ax1.plot(monthly_count_eq_df.index, monthly_count_eq_df.mag, color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_ylim(-20, 130)
ax1.set_title("Monthly Earthquake Count vs. Arbuckle Monthly SWD Well Injection Volume")


ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Monthly Injection Mean Volume per Day (barrels)', color=color)  # we already handled the x-label with ax1
ax2.plot(monthly_mean_inj_df.index, monthly_mean_inj_df.Volume_BPD, color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_ylim(0, 15000)

fig.set_size_inches(13, 8)
fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()

<IPython.core.display.Javascript object>