In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('earthquakes.csv')

# Display the first few rows of the dataset
print(data.head())

# Data exploration
print(data.info())
print(data.describe())

                      Time  Magnitude                        Place  Latitude  \
0  2023-09-04 05:26:08.839        4.8  11 km SSW of Özdere, Turkey   37.9198   
1  2023-08-29 04:58:15.305        4.8                          NaN   38.9659   
2  2023-08-27 03:09:43.247        4.4  9 km NNW of Émponas, Greece   36.3079   
3  2023-08-24 05:35:24.510        4.8                          NaN   38.2125   
4  2023-08-23 08:19:30.701        4.3               eastern Turkey   39.2741   

   Longitude   Depth  
0    27.0744  10.000  
1    32.9248   7.489  
2    27.8301  69.004  
3    38.1815  10.000  
4    40.3563  10.000  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19248 entries, 0 to 19247
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Time       19248 non-null  object 
 1   Magnitude  17486 non-null  float64
 2   Place      19216 non-null  object 
 3   Latitude   19248 non-null  float64
 4   Longitude  19248 non-null  f

In [3]:
data['Time'] = pd.to_datetime(data['Time'])
data.isnull().sum()  # Check for missing values
data.dropna(inplace=True)  # Drop rows with missing values


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
numerical_features = ['Magnitude', 'Depth', 'Latitude', 'Longitude']
data[numerical_features] = scaler.fit_transform(data[numerical_features])


In [5]:
# Define categories based on magnitude
def categorize_magnitude(magnitude):
    if magnitude <= 4.0:
        return 0  # Low magnitude
    elif magnitude <= 6.0:
        return 1  # Moderate magnitude
    else:
        return 2  # High magnitude

# Apply the function to create a new column 'Category'
data['Category'] = data['Magnitude'].apply(categorize_magnitude)


In [6]:
import torch
from torch.utils.data import DataLoader, TensorDataset

# Convert dataframe to PyTorch tensors
X = torch.tensor(data[numerical_features].values, dtype=torch.float32)
y = torch.tensor(data['Category'].values, dtype=torch.long)

# Create a dataset and data loader
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [7]:
import torch.nn as nn
import torch.optim as optim

# Define the neural network
class EarthquakeClassifier(nn.Module):
    def __init__(self):
        super(EarthquakeClassifier, self).__init__()
        self.fc1 = nn.Linear(len(numerical_features), 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 3)  # 3 classes

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Instantiate the model, define the loss function and the optimizer
model = EarthquakeClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [8]:
# Training loop
num_epochs = 20

for epoch in range(num_epochs):
    for inputs, labels in dataloader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')


Epoch 1/20, Loss: 0.0014465557178482413
Epoch 2/20, Loss: 0.00012695624900516123
Epoch 3/20, Loss: 1.186960162158357e-05
Epoch 4/20, Loss: 1.1495151284179883e-06
Epoch 5/20, Loss: 0.001002632430754602
Epoch 6/20, Loss: 4.376564447738929e-06
Epoch 7/20, Loss: 2.6140646696148906e-06
Epoch 8/20, Loss: 6.897076332279539e-07
Epoch 9/20, Loss: 2.6396310204290785e-07
Epoch 10/20, Loss: 1.5497042795686866e-06
Epoch 11/20, Loss: 1.4560415593223297e-06
Epoch 12/20, Loss: 3.150526879380777e-07
Epoch 13/20, Loss: 1.0217934942602369e-07
Epoch 14/20, Loss: 0.004569909535348415
Epoch 15/20, Loss: 4.087164029442647e-07
Epoch 16/20, Loss: 0.0
Epoch 17/20, Loss: 0.0
Epoch 18/20, Loss: 0.0
Epoch 19/20, Loss: 0.0
Epoch 20/20, Loss: 0.0


In [9]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Get predictions
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    predictions = model(X).argmax(dim=1)

# Calculate metrics
accuracy = accuracy_score(y, predictions)
precision, recall, f1, _ = precision_recall_fscore_support(y, predictions, average='weighted')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')


Accuracy: 0.9999427065429128
Precision: 0.9999457219880226
Recall: 0.9999427065429128
F1 Score: 0.9999434799571723


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Prepare the data
X = data[numerical_features]
y = data['Category']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# Predictions and evaluation
y_pred_log_reg = log_reg.predict(X_test)
accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg)
report_log_reg = classification_report(y_test, y_pred_log_reg)

print(f'Logistic Regression Accuracy: {accuracy_log_reg}')
print(f'Logistic Regression Report:\n{report_log_reg}')


Logistic Regression Accuracy: 1.0
Logistic Regression Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3489
           1       1.00      1.00      1.00         2

    accuracy                           1.00      3491
   macro avg       1.00      1.00      1.00      3491
weighted avg       1.00      1.00      1.00      3491



In [12]:
from sklearn.neighbors import KNeighborsClassifier

# KNN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Predictions and evaluation
y_pred_knn = knn.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)
report_knn = classification_report(y_test, y_pred_knn)

print(f'KNN Accuracy: {accuracy_knn}')
print(f'KNN Report:\n{report_knn}')


KNN Accuracy: 0.9997135491263248
KNN Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3489
           1       1.00      0.50      0.67         2

    accuracy                           1.00      3491
   macro avg       1.00      0.75      0.83      3491
weighted avg       1.00      1.00      1.00      3491



In [13]:
from sklearn.svm import SVC

# SVM
svm = SVC()
svm.fit(X_train, y_train)

# Predictions and evaluation
y_pred_svm = svm.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
report_svm = classification_report(y_test, y_pred_svm)

print(f'SVM Accuracy: {accuracy_svm}')
print(f'SVM Report:\n{report_svm}')


SVM Accuracy: 1.0
SVM Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3489
           1       1.00      1.00      1.00         2

    accuracy                           1.00      3491
   macro avg       1.00      1.00      1.00      3491
weighted avg       1.00      1.00      1.00      3491

