In [None]:
# Big thanks to Chen Zecharya, creator of Random Forest code re: Earthquakes on Kaggle for the base ML model.

In [10]:
# import sklearn and random forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import metrics
import pandas as pd

In [31]:
# pull csv into df
origin_df = pd.read_csv("./Data_Extraction_Transform/dropped_cols_month_earthquake.csv", index_col= 0)
origin_df["unixTime"] = pd.to_datetime(origin_df['time']).astype(int) / 10**9
origin_df.head()
origin_df.shape

(13323, 16)

## Conducting modeling for magnitude predictions.

In [68]:
# magnitude test
independent = ["unixTime", "latitude", "longitude"]
dependent = ["mag"]
X = origin_df[independent]
y = origin_df[dependent]

In [69]:
# setting training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [70]:
import warnings 
warnings.filterwarnings("ignore")
# training and looking for best accuracy
best_score = 0
best_mse = 0
best_model = None

for i in range(1, 50):
    model = RandomForestRegressor(n_estimators = i+1, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    r2 = r2_score(y_test, y_pred)
    score = model.score(X_test, y_test)

    if best_score < score:
        best_score = score
        best_mse = mse
        best_model = model

print(f"Best Model: {best_model}")
print(f"Best MSE: {best_mse}")
print(f"Best Model Score: {best_score}")

Best Model: RandomForestRegressor(n_estimators=50, random_state=42)
Best MSE: 0.28547713293769805
Best Model Score: 0.8065965685748406


In [78]:
best_y_pred = best_model.predict(X_test)
predictions = best_y_pred.tolist()
test = y_test["mag"].tolist()

prediction_df = pd.DataFrame({"Actual Magnitude": test, "Predicted Magnitude": predictions})
prediction_df.head()
prediction_df.to_csv("./ML_Predictions/mag_predictions.csv")

## Conducting modeling for depth predictions.

In [79]:
# depth test
independent = ["unixTime", "latitude", "longitude"]
dependent = ["depth"]
X = origin_df[independent]
y = origin_df[dependent]

In [80]:
# setting training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [81]:
import warnings 
warnings.filterwarnings("ignore")
# training and looking for best accuracy
best_score = 0
best_mse = 0
best_model = None

for i in range(1, 50):
    model = RandomForestRegressor(n_estimators = i+1, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    r2 = r2_score(y_test, y_pred)
    score = model.score(X_test, y_test)

    if best_score < score:
        best_score = score
        best_mse = mse
        best_model = model

print(f"Best Model: {best_model}")
print(f"Best MSE: {best_mse}")
print(f"Best Model Score: {best_score}")

Best Model: RandomForestRegressor(n_estimators=48, random_state=42)
Best MSE: 439.2291820600262
Best Model Score: 0.7375232209698414


In [82]:
best_y_pred = best_model.predict(X_test)
predictions = best_y_pred.tolist()
test = y_test["depth"].tolist()

prediction_df = pd.DataFrame({"Actual Depth": test, "Predicted Depth": predictions})
prediction_df.head()
prediction_df.to_csv("./ML_Predictions/depth_predictions.csv")

## Conducting modeling for tsunami prediction

In [64]:
# tsunami test
predictors = ["mag", "depth", "latitude","longitude"]
pred_values = ["tsunami"]
X = origin_df[predictors]
y = origin_df[pred_values]

In [65]:
# setting training and test data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [66]:
import warnings 
warnings.filterwarnings("ignore")
# training and looking for best accuracy
best_score = 0
best_mse = 0
best_model = None

for i in range(1, 50):
    model = RandomForestRegressor(n_estimators = i+1, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    r2 = r2_score(y_test, y_pred)
    score = model.score(X_test, y_test)

    if best_score < score:
        best_score = score
        best_mse = mse
        best_model = model

print(f"Best Model: {best_model}")
print(f"Best MSE: {best_mse}")
print(f"Best Model Score: {best_score}")

Best Model: RandomForestRegressor(n_estimators=6, random_state=42)
Best MSE: 7.296226808421929e-05
Best Model Score: 0.9027047607126466


In [67]:
best_y_pred = best_model.predict(X_test)
predictions = best_y_pred.tolist()

print(predictions)
# pd.DataFrame()

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,