In [None]:
# Big thanks to Chen Zecharya, creator of Random Forest code re: Earthquakes on Kaggle for the base ML model.

In [1]:
# import sklearn and random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics
import pandas as pd

## Testing modeling of magnitude with new dataset including nst and rms values.

In [2]:
# pulling test csv into df
test_df = pd.read_csv("./Data_Extraction_transform/DroppedTestwithRMSandNST.csv", index_col = 0)
test_df["unixTime"] = pd.to_datetime(test_df['time']).astype(int) / 10**9
test_nst_df = test_df.loc[test_df["nst"] >= 7]
test_nst_df.shape

(11013, 17)

In [3]:
# testing magnitude test
independent = ["unixTime", "latitude", "longitude", "nst", "rms"]
dependent = ["mag"]
X = test_nst_df[independent]
y = test_nst_df[dependent]

In [4]:
# setting training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
import warnings 
warnings.filterwarnings("ignore")
# training and looking for best accuracy
best_score = 0
best_mse = 0
best_model = None

for i in range(1, 50):
    model = RandomForestRegressor(n_estimators = i+1, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    r2 = r2_score(y_test, y_pred)
    score = model.score(X_test, y_test)

    if best_score < score:
        best_score = score
        best_mse = mse
        best_model = model

print(f"Best Model: {best_model}")
print(f"Best MSE: {best_mse}")
print(f"Best Model Score: {best_score}")

Best Model: RandomForestRegressor(n_estimators=48, random_state=42)
Best MSE: 0.06924066300105776
Best Model Score: 0.862718288813799


In [5]:
best_y_pred = best_model.predict(X_test)
predictions = best_y_pred.tolist()
test = y_test["mag"].tolist()

prediction_df = pd.DataFrame({"Actual Magnitude": test, "Predicted Magnitude": predictions})
prediction_df.head()
prediction_df.to_csv("./ML_Predictions/mag_predictions.csv")

## Testing modeling of depth using dataset with nst and rms values

In [6]:
# testing magnitude test
independent = ["unixTime", "latitude", "longitude", "nst", "rms"]
dependent = ["depth"]
X = test_nst_df[independent]
y = test_nst_df[dependent]

In [7]:
# setting training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
import warnings 
warnings.filterwarnings("ignore")
# training and looking for best accuracy
best_score = 0
best_mse = 0
best_model = None

for i in range(1, 50):
    model = RandomForestRegressor(n_estimators = i+1, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    r2 = r2_score(y_test, y_pred)
    score = model.score(X_test, y_test)

    if best_score < score:
        best_score = score
        best_mse = mse
        best_model = model

print(f"Best Model: {best_model}")
print(f"Best MSE: {best_mse}")
print(f"Best Model Score: {best_score}")

Best Model: RandomForestRegressor(n_estimators=35, random_state=42)
Best MSE: 18.809780898857564
Best Model Score: 0.816501118005512


In [8]:
best_y_pred = best_model.predict(X_test)
predictions = best_y_pred.tolist()
test = y_test["depth"].tolist()

prediction_df = pd.DataFrame({"Actual Depth": test, "Predicted Depth": predictions})
prediction_df.head()
prediction_df.to_csv("./ML_Predictions/depth_predictions.csv")