In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Step 1: Load and Inspect the Data
print("Step 1: Loading and inspecting the data...")
df = pd.read_csv('earthquake_1995-2023.csv')
print(df.head())
print(df.info())



Step 1: Loading and inspecting the data...
                                       title  magnitude         date_time  \
0           M 6.5 - 42 km W of Sola, Vanuatu        6.5  16-08-2023 12:47   
1  M 6.5 - 43 km S of IntipucÃ¡, El Salvador        6.5  19-07-2023 00:22   
2  M 6.6 - 25 km ESE of LoncopuÃ©, Argentina        6.6  17-07-2023 03:05   
3      M 7.2 - 98 km S of Sand Point, Alaska        7.2  16-07-2023 06:48   
4                   M 7.3 - Alaska Peninsula        7.3  16-07-2023 06:48   

   cdi  mmi   alert  tsunami  sig net  nst      dmin    gap magType    depth  \
0    7    4   green        0  657  us  114  7.177000   25.0     mww  192.955   
1    8    6  yellow        0  775  us   92  0.679000   40.0     mww   69.727   
2    7    5   green        0  899  us   70  1.634000   28.0     mww  171.371   
3    6    6   green        1  860  us  173  0.907000   36.0     mww   32.571   
4    0    5     NaN        1  820  at   79  0.879451  172.8      Mi   21.000   

   latitude  

In [9]:


# Step 2: Data Preprocessing and Feature Engineering
print("\nStep 2: Preprocessing the data and creating features...")
# Convert 'date_time' to datetime objects
df['date_time'] = pd.to_datetime(df['date_time'], errors='coerce')

# Drop rows where 'date_time' conversion failed
df.dropna(subset=['date_time'], inplace=True)

# Extract time-based features
df['year'] = df['date_time'].dt.year
df['month'] = df['date_time'].dt.month
df['day'] = df['date_time'].dt.day
df['hour'] = df['date_time'].dt.hour
df['minute'] = df['date_time'].dt.minute
df['second'] = df['date_time'].dt.second

# Define features (X) and target (y)
features = ['latitude', 'longitude', 'depth', 'year', 'month', 'day']
target = 'magnitude'

# Drop rows with any missing values in the selected features or target
df.dropna(subset=features + [target], inplace=True)

X = df[features]
y = df[target]




Step 2: Preprocessing the data and creating features...


  df['date_time'] = pd.to_datetime(df['date_time'], errors='coerce')


In [10]:
# Step 3: Split the data into training and testing sets
print("\nStep 3: Splitting the data into training and testing sets...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set size: {len(X_train)} samples")
print(f"Testing set size: {len(X_test)} samples")



Step 3: Splitting the data into training and testing sets...
Training set size: 474 samples
Testing set size: 119 samples


In [11]:
# Step 4: Train the Random Forest Regressor model
print("\nStep 4: Training the Random Forest Regressor model...")
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
print("Model training complete.")




Step 4: Training the Random Forest Regressor model...
Model training complete.


In [12]:
# Step 5: Evaluate the model
print("\nStep 5: Evaluating the model...")
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R-squared (R2) Score: {r2:.4f}")




Step 5: Evaluating the model...
Mean Absolute Error (MAE): 0.3505
Mean Squared Error (MSE): 0.1922
R-squared (R2) Score: -0.0677


In [13]:
# Step 6: Displaying a sample of predictions vs. actual values
print("\nStep 6: Sample of Actual vs. Predicted Magnitudes")
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head())


Step 6: Sample of Actual vs. Predicted Magnitudes
     Actual  Predicted
51      7.0      7.095
127     6.6      6.764
370     6.9      7.123
709     7.1      7.677
303     7.6      7.307
