In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

data = pd.read_csv('earthquakes.csv')

print(data.head())
print("_____________________________________")
print(data.info())
print(data.describe())

                      Time  Magnitude                        Place  Latitude  \
0  2023-09-04 05:26:08.839        4.8  11 km SSW of Özdere, Turkey   37.9198   
1  2023-08-29 04:58:15.305        4.8                          NaN   38.9659   
2  2023-08-27 03:09:43.247        4.4  9 km NNW of Émponas, Greece   36.3079   
3  2023-08-24 05:35:24.510        4.8                          NaN   38.2125   
4  2023-08-23 08:19:30.701        4.3               eastern Turkey   39.2741   

   Longitude   Depth  
0    27.0744  10.000  
1    32.9248   7.489  
2    27.8301  69.004  
3    38.1815  10.000  
4    40.3563  10.000  
_____________________________________
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19248 entries, 0 to 19247
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Time       19248 non-null  object 
 1   Magnitude  17486 non-null  float64
 2   Place      19216 non-null  object 
 3   Latitude   19248 non-null  flo

In [12]:
# Handle missing values if any
data = data.dropna()

features = ['Depth', 'Latitude', 'Longitude']
target = 'Magnitude'

X = data[features]
y = data[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# Standardize and normalise the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

class EarthquakeNN(nn.Module):
    def __init__(self):
        super(EarthquakeNN, self).__init__()
        self.fc1 = nn.Linear(3, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model, define the loss function and the optimizer
model = EarthquakeNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100
train_losses = []

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    train_losses.append(loss.item())
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
model.eval()
with torch.no_grad():
    y_pred_train = model(X_train_tensor)
    y_pred_test = model(X_test_tensor)

# Calculate performance metrics
train_mse = mean_squared_error(y_train_tensor, y_pred_train)
test_mse = mean_squared_error(y_test_tensor, y_pred_test)
train_r2 = r2_score(y_train_tensor, y_pred_train)
test_r2 = r2_score(y_test_tensor, y_pred_test)

print(f'Train MSE: {train_mse:.4f}, Train R2: {train_r2:.4f}')
print(f'Test MSE: {test_mse:.4f}, Test R2: {test_r2:.4f}')


def categorize_magnitude(magnitude):
    if magnitude <= 4.0:
        return 0  # Low magnitude
    elif magnitude <= 6.0:
        return 1  # Moderate magnitude
    else:
        return 2  # High magnitude




Epoch [10/100], Loss: 8.1392
Epoch [20/100], Loss: 5.6618
Epoch [30/100], Loss: 3.5332
Epoch [40/100], Loss: 2.1126
Epoch [50/100], Loss: 1.3082
Epoch [60/100], Loss: 0.8185
Epoch [70/100], Loss: 0.6109
Epoch [80/100], Loss: 0.5470
Epoch [90/100], Loss: 0.5145
Epoch [100/100], Loss: 0.4906
Train MSE: 0.4885, Train R2: 0.0120
Test MSE: 0.4751, Test R2: -0.0055


In [5]:
#provjera performansi na testnom skupu

y_class = data['Magnitude'].apply(categorize_magnitude)

X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(X, y_class, test_size=0.2, random_state=42)

log_reg = LogisticRegression()
log_reg.fit(X_train_cls, y_train_cls)
y_pred_log_reg = log_reg.predict(X_test_cls)
log_reg_acc = accuracy_score(y_test_cls, y_pred_log_reg)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_cls, y_train_cls)
y_pred_knn = knn.predict(X_test_cls)
knn_acc = accuracy_score(y_test_cls, y_pred_knn)

# SVM - podrška vektorskih strojeva
svm = SVC()
svm.fit(X_train_cls, y_train_cls)
y_pred_svm = svm.predict(X_test_cls)
svm_acc = accuracy_score(y_test_cls, y_pred_svm)

print(f'Accuracy of Logistic Regression: {log_reg_acc:.2f}%')
print(f'Accuracy of KNN: {knn_acc:.2f}%')
print(f'Accuracy of SVM: {svm_acc:.2f}%')

Accuracy of Logistic Regression: 0.88%
Accuracy of KNN: 0.88%
Accuracy of SVM: 0.89%
