In [13]:
import pandas as pd
from math import radians, cos, sin, asin, sqrt
import datetime

In [2]:
# open earthquake data - Earthquakes located within 20km of an injection well
eq_df = pd.read_csv('../oklahoma_earthquakes_largefiles/for_analysis/eq_20km.csv', low_memory=False)
# converting date fields to date-time dtype
eq_df.iloc[:, 10] = eq_df.iloc[:, 10].apply(pd.to_datetime)

In [27]:
# Filtering out erroneous negative magnitudes 
eq_df = eq_df[eq_df.mag >0]

In [30]:
# Sort dataframe by magnitude, descending
eq_df = eq_df.sort_values("mag", ascending=False)
eq_df.head()

Unnamed: 0,id,time,mag,magType,cdi,place,status,latitude,longitude,depth,date,lat/lon,nearby_wells,num_wells,year_month,year_month_day
2375,us10006jxs,1472904164400,5.8,mww,7.9,"14 km NW of Pawnee, Oklahoma",reviewed,36.4251,-96.9291,5.557,2016-09-03 12:02:44.400,"36.4251,-96.9291","[3511700179, 3511704379, 3511701087, 351032428...",25,2016-09,2016-09-03
16758,usp000jadn,1320551590000,5.7,mww,6.9,"8 km NW of Prague, Oklahoma",reviewed,35.532,-96.765,5.2,2011-11-06 03:53:10.000,"35.532,-96.765","[3508123975, 3508123875, 3508123856, 350812368...",28,2011-11,2011-11-06
3739,us20004zy8,1455383226290,5.1,mww,6.8,"18 km SE of Waynoka, Oklahoma",reviewed,36.4898,-98.709,8.31,2016-02-13 17:07:06.290,"36.4898,-98.709","[3509323212, 3515121599, 3515123533, 351512375...",6,2016-02,2016-02-13
1789,us100075y8,1478483064500,5.0,mww,7.1,"3 km W of Cushing, Oklahoma",reviewed,35.9907,-96.803,4.43,2016-11-07 01:44:24.500,"35.9907,-96.803","[3503728644, 3503728759, 3503728748, 350373620...",62,2016-11,2016-11-07
16729,usp000jajb,1320720417000,4.8,mwr,6.7,"9 km SSE of Sparks, Oklahoma",reviewed,35.531,-96.788,5.0,2011-11-08 02:46:57.000,"35.531,-96.788","[3508123975, 3508123875, 3508123856, 350812368...",25,2011-11,2011-11-08


In [None]:
# Create copy df so as not to alter the original
eq_df_test = eq_df.copy()

# Selecting only magnitude 3s; originally did not do this, but the function below would take >30 minutes to run
# if it was calculating the distances between all 16k earthquakes
eq_df_test = eq_df_test[eq_df_test["mag"] >=3]

In [120]:
# Create lists that will be added that will be itterated though to calculate distances

id_list = eq_df_test.id.to_list()
mag_list = eq_df_test.mag.to_list()
coord_list = eq_df_test["lat/lon"].to_list()
year_month_list = eq_df_test.year_month.to_list()
year_month_day_list = eq_df_test.year_month_day.to_list()

In [121]:
# Create list of dictionaries (lod) containing the information in the above lists

eq_lod = []
for count in range(len(id_list)):
    temp_dict = {"id": id_list[count],
                "mag": mag_list[count],
                "lat/lon": coord_list[count],
                "year_month": year_month_list[count],
                "year_month_day": year_month_day_list[count],
                "pre/aftershock": False
                }
    eq_lod.append(temp_dict)
    
# Checking length of earthquake list of dictionaries
len(eq_lod)

In [167]:
# Initialize lists that will contain the parent and pre/aftershock earthquake IDs

global shocks
shocks = []
global parent
parent = []

In [166]:
# Function that calculates the number of preshocks and aftershocks for a main/parent earthquake.
# Major assumptions were that all pre/aftershocks occur within a 1km radius of the parent earthquake,
# pre/aftershocks can occur indefinately before or after the main shock,
# the main shock is the largest magnitude that defines the 1km radius.

# This is a quick and dirty attempt to decluster the earthquakes as a large issue with performing a linear 
# regression is that there is that relatively larger earthquakes have more pre/aftershocks - thus, a month
# with a magnitude M>4 will have a very large spike of earthquakes where a large amount of these earthquakes
# are pre/aftershocks of the main larger earthquake. Subsequently, whereas the triggering variable for the 
# main earthquake is injection volume, the aftershocks have a subsequent triggering variable - magnitude of 
# the main earthquake. Determining the interplay between the injection volume and the magnitude of the 
# parent earthquake to predict which subsequent earthquakes are considered purely pre/aftershocks or are purely
# the direct result of injection is beyond the scope of this project, would require significantly more time 
# and domain knowledge.
 

def eq_2km_count(eq_id, lat_lon):
    
#     print(eq_id)
    
    global nearby_eqs
    nearby_eqs = []
    
    count = 0
    
    if eq_id not in parent and eq_id not in shocks:
        parent.append(eq_id)
    
    for item in eq_lod:
        if item["id"] != eq_id and item["id"] not in shocks and item["id"] not in parent:
            lat1 = float(lat_lon.split(",")[0])
            lon1 = float(lat_lon.split(",")[1])
            lat2 = float(item['lat/lon'].split(",")[0])
            lon2 = float(item['lat/lon'].split(",")[1])
            
            # convert decimal degrees to radians 
            rlon1, rlat1, rlon2, rlat2 = map(radians, [lon1, lat1, lon2, lat2])

            # haversine formula 
            dlon = rlon2 - rlon1 
            dlat = rlat2 - rlat1 
            a = sin(dlat/2)**2 + cos(rlat1) * cos(rlat2) * sin(dlon/2)**2
            c = 2 * asin(sqrt(a)) 
            r = 6371 # Radius of earth in kilometers. Use 3956 for miles
            dist = c * r

            if dist <= 2:
                nearby_eqs.append(item["id"])
                shocks.append(item["id"])
                eq_lod[count]["pre/aftershock"] == True
            
            count += 1
            
        else:
            count += 1
            pass
#     print(len(nearby_eqs))
#     print(len(categorized))
                
    return(len(nearby_eqs))

In [168]:
# Running the function and claculating the length required for the calculations. 
begin_time = datetime.datetime.now()
eq_df_test["nearby_eqs"] = eq_df_test.apply(lambda row: eq_2km_count(row["id"], row["lat/lon"]), axis=1)
print(datetime.datetime.now() - begin_time)

0:05:44.269254


In [169]:
# Assign earthquake class (parent or pre/aftershock)
eq_df_test["eq_class"] = eq_df_test.id.apply(lambda x: "parent" if x in parent else "shock")

In [170]:
# Filter for parent earthquakes. 
parenteq = eq_df_test[eq_df_test.eq_class == "parent"]
len(parenteq)

564

In [173]:
# Export dataframe with just parent earthquakes
eq_df_test.to_csv('../oklahoma_earthquakes_largefiles/python_exports/eqs_with_class.csv',  index = False)

In [174]:
eq_df_test

Unnamed: 0,id,time,mag,magType,cdi,place,status,latitude,longitude,depth,date,lat/lon,nearby_wells,num_wells,year_month,year_month_day,nearby_eqs,eq_class
2375,us10006jxs,1472904164400,5.8,mww,7.9,"14 km NW of Pawnee, Oklahoma",reviewed,36.4251,-96.9291,5.557,2016-09-03 12:02:44.400,"36.4251,-96.9291","[3511700179, 3511704379, 3511701087, 351032428...",25,2016-09,2016-09-03,10,parent
16758,usp000jadn,1320551590000,5.7,mww,6.9,"8 km NW of Prague, Oklahoma",reviewed,35.5320,-96.7650,5.200,2011-11-06 03:53:10.000,"35.532,-96.765","[3508123975, 3508123875, 3508123856, 350812368...",28,2011-11,2011-11-06,33,parent
3739,us20004zy8,1455383226290,5.1,mww,6.8,"18 km SE of Waynoka, Oklahoma",reviewed,36.4898,-98.7090,8.310,2016-02-13 17:07:06.290,"36.4898,-98.709","[3509323212, 3515121599, 3515123533, 351512375...",6,2016-02,2016-02-13,19,parent
1789,us100075y8,1478483064500,5.0,mww,7.1,"3 km W of Cushing, Oklahoma",reviewed,35.9907,-96.8030,4.430,2016-11-07 01:44:24.500,"35.9907,-96.803","[3503728644, 3503728759, 3503728748, 350373620...",62,2016-11,2016-11-07,29,parent
16729,usp000jajb,1320720417000,4.8,mwr,6.7,"9 km SSE of Sparks, Oklahoma",reviewed,35.5310,-96.7880,5.000,2011-11-08 02:46:57.000,"35.531,-96.788","[3508123975, 3508123875, 3508123856, 350812368...",25,2011-11,2011-11-08,5,parent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6312,us10001sei,1427818987000,3.0,ml,3.1,"12 km ENE of Wakita, Oklahoma",reviewed,36.9210,-97.7934,2.634,2015-03-31 16:23:07.000,"36.921,-97.7934","[3505322791, 3505322786, 3505322764, 350532276...",28,2015-03,2015-03-31,0,shock
6314,us10001scn,1427813214900,3.0,ml,2.7,"5 km ENE of Covington, Oklahoma",reviewed,36.3262,-97.5257,2.554,2015-03-31 14:46:54.900,"36.3262,-97.5257","[3504724485, 3504724404, 3504724338, 350472421...",28,2015-03,2015-03-31,0,shock
6318,us10001s8l,1427773360400,3.0,ml,2.2,"12 km ENE of Nescatunga, Oklahoma",reviewed,36.7840,-98.0176,6.370,2015-03-31 03:42:40.400,"36.784,-98.0176","[3505322795, 3505322791, 3505322796, 350532248...",38,2015-03,2015-03-31,0,shock
6327,us10001s1b,1427725054200,3.0,ml,3.1,"10 km NW of Morrison, Oklahoma",reviewed,36.3569,-97.0974,5.000,2015-03-30 14:17:34.200,"36.3569,-97.0974","[3511700179, 3511701087, 3510324300, 351032428...",30,2015-03,2015-03-30,0,shock
