In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
file_path = '/content/drive/MyDrive/Minor Project Sem-VI/data.csv'

In [None]:
import pandas as pd

In [None]:
from google.colab import data_table
data_table.enable_dataframe_formatter()
df = pd.read_csv(file_path, delimiter=r'\s+')

In [None]:
df

In [None]:
new_column_names = ["Date(YYYY/MM/DD)",  "Time(UTC)", "Latitude(deg)", "Longitude(deg)", "Depth(km)", "Magnitude(ergs)",
                    "Magnitude_type", "No_of_Stations", "Gap", "Close", "RMS", "SRC", "EventID"]

df.columns = new_column_names
ts = pd.to_datetime(df["Date(YYYY/MM/DD)"] + " " + df["Time(UTC)"])
df = df.drop(["Date(YYYY/MM/DD)", "Time(UTC)"], axis=1)
df.index = ts
display(df)

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.pairplot(df)
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Apply a clean theme
sns.set_theme(style="whitegrid")

plt.figure(figsize=(12, 7))
scatter = sns.scatterplot(
    data=df,
    x='Latitude(deg)',
    y='Magnitude(ergs)',
    hue='Magnitude(ergs)',
    palette='viridis',
    s=100,
    edgecolor='black',
    alpha=0.8
)
plt.title('Earthquake Magnitude vs Latitude', fontsize=16, fontweight='bold', color='darkblue')
plt.xlabel('Latitude (degrees)', fontsize=13)
plt.ylabel('Magnitude (ergs)', fontsize=13)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(title='Magnitude', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set Seaborn theme
sns.set_theme(style="whitegrid")

plt.figure(figsize=(8,6))

# Enhanced scatter plot
scatter = sns.scatterplot(
    data=df,
    x='Longitude(deg)',
    y='Magnitude(ergs)',
    hue='Magnitude(ergs)',
    palette='coolwarm',
    s=100,
    edgecolor='black',
    alpha=0.85
)
plt.title('Earthquake Magnitude vs Longitude', fontsize=16, fontweight='bold', color='darkred')
plt.xlabel('Longitude (degrees)', fontsize=13)
plt.ylabel('Magnitude (ergs)', fontsize=13)

# Styled grid and layout
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(title='Magnitude', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()


In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(12, 9))
ax = fig.add_subplot(111, projection='3d')

# 3D scatter with color by magnitude
scatter = ax.scatter(
    df['Latitude(deg)'],
    df['Longitude(deg)'],
    df['Depth(km)'],
    c=df['Magnitude(ergs)'],
    cmap='plasma',              # Vibrant colormap
    s=60,                       # Marker size
    edgecolor='black',
    alpha=0.9
)

# Axes labels and title
ax.set_xlabel('Latitude (deg)', fontsize=12)
ax.set_ylabel('Longitude (deg)', fontsize=12)
ax.set_zlabel('Depth (km)', fontsize=12)
ax.set_title('3D Scatter of Location, Depth & Magnitude', fontsize=16, fontweight='bold', color='darkblue')

# Color bar for magnitude
cbar = fig.colorbar(scatter, ax=ax, shrink=0.6, pad=0.1)
cbar.set_label('Magnitude (ergs)', fontsize=12)

plt.tight_layout()
plt.show()


In [None]:
df.columns

In [None]:
df.columns

In [None]:
X = df[['Latitude(deg)', 'Longitude(deg)', 'Depth(km)', 'No_of_Stations']]
y = df['Magnitude(ergs)']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
print(X.shape)
print(y.shape)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
from sklearn.metrics import r2_score, mean_squared_error

scores= {"Model name": ["Linear regression", "SVM", "Random Forest"], "mse": [], "R^2": []}

y_pred = regressor.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

scores['mse'].append(mse)
scores['R^2'].append(r2)

print("R^2: {:.2f}, MSE: {:.2f}".format(r2, mse))

In [None]:
joblib.dump(regressor, 'earthquake_model_reg.pkl')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set Seaborn theme
sns.set_theme(style="whitegrid")

plt.figure(figsize=(7,6))

# Individual regplots for each feature
sns.regplot(x=X_test['Latitude(deg)'], y=y_test, color='blue',
            scatter_kws={'s': 20, 'alpha': 0.7}, label='Latitude (deg)')
sns.regplot(x=X_test['Longitude(deg)'], y=y_test, color='red',
            scatter_kws={'s': 20, 'alpha': 0.7}, label='Longitude (deg)')
sns.regplot(x=X_test['Depth(km)'], y=y_test, color='gold',
            scatter_kws={'s': 20, 'alpha': 0.7}, label='Depth (km)')
sns.regplot(x=X_test['No_of_Stations'], y=y_test, color='purple',
            scatter_kws={'s': 20, 'alpha': 0.7}, label='No. of Stations')

plt.title('Multiple Linear Regression Model: Predictors vs Magnitude', fontsize=16, fontweight='bold', color='darkgreen')
plt.xlabel('Predictor Variables (varied)', fontsize=13)
plt.ylabel('Magnitude (ergs)', fontsize=13)
plt.legend(title='Features', fontsize=11, title_fontsize=12)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()



In [None]:
from sklearn.svm import SVR
subset_size = 500
X_train_subset = X_train[:subset_size]
y_train_subset = y_train[:subset_size]
svm = SVR(kernel='rbf', C=1e3, gamma=0.1)
svm.fit(X_train_subset, y_train_subset)
score = svm.score(X_test, y_test)
print("Test score:", score)

In [None]:
y_pred_svm = svm.predict(X_test)
r2_svm = r2_score(y_test, y_pred_svm)
mse_svm = mean_squared_error(y_test, y_pred_svm)

scores['mse'].append(mse_svm)
scores['R^2'].append(r2_svm)

print("SVM R^2: {:.2f}, MSE: {:.2f}".format(r2_svm, mse_svm))

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

scores['mse'].append(mse)
scores['R^2'].append(r2)

print('Mean Squared Error: ', mse)
print('R^2 Score: ', r2)

In [None]:
sns.set_theme(style="whitegrid")

plt.figure(figsize=(7,5))

# Scatter plot with styling
plt.scatter(y_test, y_pred, color='mediumseagreen', edgecolor='black', s=60, alpha=0.7)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='darkred', linestyle='--', linewidth=2, label='Perfect Prediction')
plt.xlabel('Actual Magnitude', fontsize=13)
plt.ylabel('Predicted Magnitude', fontsize=13)
plt.title('Random Forest Regression: Actual vs Predicted', fontsize=16, fontweight='bold', color='navy')
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()
plt.tight_layout()
plt.show()











In [None]:
importances = rf.feature_importances_
features = ['Latitude', 'Longitude', 'Depth', 'No. of Stations']
sns.set_theme(style="whitegrid")

plt.figure(figsize=(10, 6))
bars = plt.barh(features, importances, color=sns.color_palette("viridis", len(features)), edgecolor='black')

for bar in bars:
    plt.text(bar.get_width() + 0.005, bar.get_y() + bar.get_height()/2,
             f'{bar.get_width():.3f}', va='center', fontsize=11)
plt.xlabel('Importance Score', fontsize=13)
plt.ylabel('Features', fontsize=13)
plt.title('Feature Importance (Random Forest)', fontsize=16, fontweight='bold', color='darkblue')
plt.tight_layout()
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")

plt.figure(figsize=(10, 6))
sns.residplot(x=y_test, y=y_pred, color='darkorange', lowess=True,
              scatter_kws={'alpha': 0.6, 's': 50, 'edgecolor': 'black'})
plt.xlabel('Actual Magnitude', fontsize=13)
plt.ylabel('Residuals (Predicted - Actual)', fontsize=13)
plt.title('Residual Plot of Random Forest Model', fontsize=16, fontweight='bold', color='darkred')
plt.grid(True, linestyle='--', alpha=0.5)
plt.axhline(0, color='gray', linestyle='--', linewidth=1.5)
plt.tight_layout()
plt.show()


In [None]:
plt.plot(y_test.index[:20], y_test[:20], color='blue', label='Actual Magnitude')
plt.plot(y_test.index[:20], y_pred[:20], color='orange', label='Predicted Magnitude')
plt.xlabel('Index')
plt.ylabel('Magnitude')
plt.title('Actual vs. Predicted Line Plot')
plt.legend()
plt.show()

In [None]:
from sklearn.tree import DecisionTreeRegressor
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train, y_train)
y_pred_dt = dt_regressor.predict(X_test)
r2_dt = r2_score(y_test, y_pred_dt)
mse_dt = mean_squared_error(y_test, y_pred_dt)

scores['mse'].append(mse_dt)
scores['R^2'].append(r2_dt)

print("Decision Tree R^2: {:.2f}, MSE: {:.2f}".format(r2_dt, mse_dt))


In [None]:
joblib.dump(dt_regressor, 'earthquake_model_dt.pkl')

In [None]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
mse_knn = mean_squared_error(y_test, y_pred_knn)
r2_knn = r2_score(y_test, y_pred_knn)

print("KNN R^2: {:.2f}, MSE: {:.2f}".format(r2_knn, mse_knn))


In [None]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
import matplotlib.pyplot as plt
neigh = NearestNeighbors(n_neighbors=5)
neigh.fit(X_train)
distances, indices = neigh.kneighbors(X_train.iloc[[0]])
plt.figure(figsize=(8, 6))
plt.scatter(X_train['Latitude(deg)'], X_train['Longitude(deg)'], c='grey', label='Training Data')
plt.scatter(X_train['Latitude(deg)'].iloc[[0]], X_train['Longitude(deg)'].iloc[[0]], c='red', label='Target Data Point')
for index in indices[0]:
  plt.plot([X_train['Latitude(deg)'].iloc[[0]], X_train['Latitude(deg)'].iloc[[index]]],
           [X_train['Longitude(deg)'].iloc[[0]], X_train['Longitude(deg)'].iloc[[index]]],
           c='blue')

plt.scatter(X_train['Latitude(deg)'].iloc[indices[0]], X_train['Longitude(deg)'].iloc[indices[0]], c='green', label='Nearest Neighbors')

plt.xlabel('Latitude')
plt.ylabel('Longitude')
plt.title('KNN Graph of Nearest Neighbors')
plt.legend()
plt.show()


In [None]:
joblib.dump(neigh, 'earthquake_model_knn.pkl')

In [None]:
!pip install xgboost


In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred_xgb)
r2 = r2_score(y_test, y_pred_xgb)

print("XGBoost Model Performance:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {mse**0.5}")
print(f"R2 Score: {r2}")


In [None]:
plt.figure(figsize=(4,3))
plt.scatter(y_test, y_pred_xgb, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
plt.xlabel('Actual Magnitude')
plt.ylabel('Predicted Magnitude')
plt.title('XGBoost: Actual vs. Predicted Magnitude')
plt.show()


In [None]:
joblib.dump(xgb_model, 'earthquake_model_xgb.pkl')

In [None]:
!pip install lightgbm catboost


In [None]:
import lightgbm as lgb
from catboost import CatBoostRegressor


In [None]:
# LightGBM Regressor
lgb_model = lgb.LGBMRegressor(objective='regression', n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)

# Train
lgb_model.fit(X_train, y_train)

# Predict
y_pred_lgb = lgb_model.predict(X_test)

# Evaluate
mse_lgb = mean_squared_error(y_test, y_pred_lgb)
r2_lgb = r2_score(y_test, y_pred_lgb)

print("LightGBM Model Performance:")
print(f"Mean Squared Error (MSE): {mse_lgb}")
print(f"Root Mean Squared Error (RMSE): {mse_lgb**0.5}")
print(f"R2 Score: {r2_lgb}")


In [None]:
import lightgbm as lgb
lgb.plot_importance(lgb_model, max_num_features=10, importance_type='gain') # or 'split'
plt.title("LightGBM Feature Importance")
plt.show()

# Plot the predicted vs. actual values
plt.figure(figsize=(4,3))
plt.scatter(y_test, y_pred_lgb, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
plt.xlabel('Actual Magnitude')
plt.ylabel('Predicted Magnitude')
plt.title('LightGBM: Actual vs. Predicted Magnitude')
plt.show()
lgb.create_tree_digraph(lgb_model)


In [None]:
joblib.dump(lgb_model, 'earthquake_model_lgb.pkl')

In [None]:
# CatBoost Regressor
cat_model = CatBoostRegressor(iterations=100, learning_rate=0.1, depth=5, random_state=42, verbose=0)

# Train
cat_model.fit(X_train, y_train)

# Predict
y_pred_cat = cat_model.predict(X_test)

# Evaluate
mse_cat = mean_squared_error(y_test, y_pred_cat)
r2_cat = r2_score(y_test, y_pred_cat)

print("CatBoost Model Performance:")
print(f"Mean Squared Error (MSE): {mse_cat}")
print(f"Root Mean Squared Error (RMSE): {mse_cat**0.5}")
print(f"R2 Score: {r2_cat}")


In [None]:
from catboost import CatBoostRegressor, Pool, metrics, cv
from sklearn.metrics import mean_squared_error, r2_score
model = CatBoostRegressor(iterations=1000,
                          learning_rate=0.03,
                          depth=6,
                          loss_function='RMSE',
                          eval_metric='RMSE',
                          random_seed=42,
                          od_type='Iter',
                          od_wait=50,
                          verbose=100)

# Fit the model with evaluation set
model.fit(X_train, y_train,
          eval_set=(X_test, y_test),
          plot=True)

# Feature Importance
feature_importances = model.get_feature_importance(prettified=True)
print(feature_importances)
plt.figure(figsize=(7,5))
plt.barh(feature_importances['Feature Id'], feature_importances['Importances'], color='skyblue', edgecolor='black')
plt.title("CatBoost Feature Importance", fontsize=16, fontweight='bold')
plt.xlabel("Importance", fontsize=12)
plt.ylabel("Features", fontsize=12)
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# Prediction and Evaluation
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse}")
print(f"R-squared: {r2}")

# Learning Curve
evals_result = model.evals_result_
iterations = list(range(len(evals_result['learn']['RMSE'])))
plt.figure(figsize=(7,5))
plt.plot(iterations, evals_result['learn']['RMSE'], label='Train', color='dodgerblue', linewidth=2)
plt.plot(iterations, evals_result['validation']['RMSE'], label='Test', color='orangered', linewidth=2)
plt.xlabel('Iterations', fontsize=12)
plt.ylabel('RMSE', fontsize=12)
plt.title('CatBoost Learning Curve', fontsize=16, fontweight='bold')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()
plt.show()

plt.figure(figsize=(7,5))
plt.scatter(y_test, y_pred, alpha=0.6, color='mediumseagreen', edgecolors='black')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--', lw=2)
plt.xlabel("Actual Magnitude (ergs)", fontsize=12)
plt.ylabel("Predicted Magnitude (ergs)", fontsize=12)
plt.title("CatBoost Predicted vs Actual", fontsize=16, fontweight='bold')
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()


In [None]:
joblib.dump(model, 'earthquake_model_cb.pkl')

In [None]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression


In [None]:
# Define Base Models
base_models = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('knn', KNeighborsRegressor(n_neighbors=5)),
    ('xgb', xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)),
    ('lgb', lgb.LGBMRegressor(objective='regression', n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)),
    ('cat', CatBoostRegressor(iterations=100, learning_rate=0.1, depth=5, random_state=42, verbose=0))
]


In [None]:
meta_model = LinearRegression()


In [None]:
# Stacking Regressor
stacking_model = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    cv=5,
    n_jobs=-1
)

stacking_model.fit(X_train, y_train)

# Predict
y_pred_stack = stacking_model.predict(X_test)

# Evaluate
mse_stack = mean_squared_error(y_test, y_pred_stack)
r2_stack = r2_score(y_test, y_pred_stack)

print("Stacking Model Performance:")
print(f"Mean Squared Error (MSE): {mse_stack}")
print(f"Root Mean Squared Error (RMSE): {mse_stack**0.5}")
print(f"R2 Score: {r2_stack}")


In [None]:
plt.figure(figsize=(8, 6))
plt.plot(y_test.index, y_test, label='Actual Magnitude')
plt.plot(y_test.index, y_pred_stack, label='Stacked Model Prediction')
plt.xlabel('Sample Index')
plt.ylabel('Magnitude')
plt.title('Stacking Model: Actual vs. Predicted Magnitude')
plt.legend()
plt.show()


In [None]:
import joblib
joblib.dump(rf, 'earthquake_model.pkl')

In [None]:
import joblib
joblib.dump(stacking_model, 'earthquake_model_sm.pkl')

In [None]:
joblib.dump(stacking_model, 'earthquake_model_sm.pkl')