In [50]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import pandas as pd



In [51]:
data = pd.read_csv("Data KP Gempa Bumi Wilayah Sumatera Selatan Dan Sekitarnya.csv")
data

Unnamed: 0,Day,Month,Years,Time,Latitude,Longitude,Depth,Mag,Place
0,4,10,2022,20:14:43,-4.9967,104.2793,112.997,4.40,Kotabumi
1,1,10,2022,13:50:16,-3.8325,101.8353,60.652,4.90,Bengkulu
2,29,9,2022,12:56:31,-5.4541,102.6805,37.099,4.50,kotabumi
3,19,9,2022,19:16:25,-5.5071,102.8860,38.085,4.20,Pagar Alam
4,13,9,2022,11:18:03,-5.6133,104.1844,96.048,4.90,Kotabumi
...,...,...,...,...,...,...,...,...,...
2807,15,4,1916,12:31:43,-4.3380,102.5970,35.000,6.71,Bengkulu
2808,14,7,1914,3:10:26,-5.3050,102.5790,30.000,6.32,Pagar Alam
2809,25,6,1914,19:07:25,-4.0120,101.6380,35.000,7.55,Bengkulu
2810,3,6,1909,18:40:43,-2.7130,101.1880,35.000,7.58,Sungai Penuh


In [52]:
minutes = []
hours = []
seconds = []

for t in data.Time:
    t  = t.split(":")
    minutes.append(t[1])
    hours.append(t[0])
    seconds.append(t[2])

In [53]:
data['Time'] = pd.to_datetime(data['Time'])

In [54]:
data['Jam'] = data['Time'].dt.hour
data['Menit'] = data['Time'].dt.minute
data['Detik'] = data['Time'].dt.second

In [55]:
data.drop(['Time'], axis=1, inplace=True)
data.head()

Unnamed: 0,Day,Month,Years,Latitude,Longitude,Depth,Mag,Place,Jam,Menit,Detik
0,4,10,2022,-4.9967,104.2793,112.997,4.4,Kotabumi,20,14,43
1,1,10,2022,-3.8325,101.8353,60.652,4.9,Bengkulu,13,50,16
2,29,9,2022,-5.4541,102.6805,37.099,4.5,kotabumi,12,56,31
3,19,9,2022,-5.5071,102.886,38.085,4.2,Pagar Alam,19,16,25
4,13,9,2022,-5.6133,104.1844,96.048,4.9,Kotabumi,11,18,3


In [56]:
data['label'] = pd.cut(data['Mag'], bins=[0, 4, 6, float('inf')], labels=['gempa lemah', 'gempa sedang', 'gempa kuat'])
data

Unnamed: 0,Day,Month,Years,Latitude,Longitude,Depth,Mag,Place,Jam,Menit,Detik,label
0,4,10,2022,-4.9967,104.2793,112.997,4.40,Kotabumi,20,14,43,gempa sedang
1,1,10,2022,-3.8325,101.8353,60.652,4.90,Bengkulu,13,50,16,gempa sedang
2,29,9,2022,-5.4541,102.6805,37.099,4.50,kotabumi,12,56,31,gempa sedang
3,19,9,2022,-5.5071,102.8860,38.085,4.20,Pagar Alam,19,16,25,gempa sedang
4,13,9,2022,-5.6133,104.1844,96.048,4.90,Kotabumi,11,18,3,gempa sedang
...,...,...,...,...,...,...,...,...,...,...,...,...
2807,15,4,1916,-4.3380,102.5970,35.000,6.71,Bengkulu,12,31,43,gempa kuat
2808,14,7,1914,-5.3050,102.5790,30.000,6.32,Pagar Alam,3,10,26,gempa kuat
2809,25,6,1914,-4.0120,101.6380,35.000,7.55,Bengkulu,19,7,25,gempa kuat
2810,3,6,1909,-2.7130,101.1880,35.000,7.58,Sungai Penuh,18,40,43,gempa kuat


In [57]:
label_dummies = pd.get_dummies(data['label'])

In [58]:
dataset = pd.concat([data, label_dummies], axis=1)

In [59]:
X = data[['Detik', 'Menit', 'Jam', 'Day', 'Month', 'Years', 'Latitude', 'Longitude', 'Depth']]
y = data['label']

In [60]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [61]:
# Train and evaluate models
models = [
    ('Random Forest', RandomForestClassifier()),
    ('SVM', SVC()),
    ('KNN', KNeighborsClassifier()),
    ('Gradient Boosting', GradientBoostingClassifier()),
    ('Neural Network', MLPClassifier()),
    ('Decision Tree', DecisionTreeClassifier()),
    ('Logistic Regression', LogisticRegression()),
    ('Naive Bayes', GaussianNB())
]

In [62]:
y_test

402     gempa sedang
2259    gempa sedang
1190    gempa sedang
296     gempa sedang
1099    gempa sedang
            ...     
1945    gempa sedang
926      gempa lemah
1195    gempa sedang
1368    gempa sedang
2183     gempa lemah
Name: label, Length: 563, dtype: category
Categories (3, object): ['gempa lemah' < 'gempa sedang' < 'gempa kuat']

In [63]:
X_test

Unnamed: 0,Detik,Menit,Jam,Day,Month,Years,Latitude,Longitude,Depth
402,41,54,21,7,8,2016,-5.2934,102.5759,54.61
2259,4,44,8,29,7,1994,-4.7910,103.1290,33.00
1190,22,26,22,12,9,2007,-4.3630,101.1800,35.00
296,43,3,23,19,5,2018,-4.2028,102.0625,60.79
1099,57,23,22,23,9,2007,-3.6820,100.8460,35.00
...,...,...,...,...,...,...,...,...,...
1945,6,42,22,6,6,2000,-5.3970,102.0490,33.00
926,54,50,17,23,5,2008,-3.6120,101.3920,146.40
1195,15,54,21,12,9,2007,-4.2160,101.1070,28.10
1368,36,29,11,28,6,2005,-4.9120,102.8850,30.00


In [64]:
y_pred

array(['gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa kuat',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa lemah',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa lemah', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'gempa sedang', 'gempa sedang', 'gempa sedang', 'gempa sedang',
       'ge

In [76]:
from sklearn.metrics import precision_score,f1_score
precision = precision_score(y_test, y_pred, average='macro')
f1 =f1_score(y_test,y_pred,average='macro')


In [77]:
for name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name}:")
    print(f"Accuracy: {accuracy:.3f}")
    print(f"precision: {precision:.3f}")
    print(f"f1:{f1:.3f}")

Random Forest:
Accuracy: 0.950
precision: 0.596
f1:0.521
SVM:
Accuracy: 0.945
precision: 0.596
f1:0.521
KNN:
Accuracy: 0.948
precision: 0.596
f1:0.521
Gradient Boosting:
Accuracy: 0.945
precision: 0.596
f1:0.521
Neural Network:
Accuracy: 0.934
precision: 0.596
f1:0.521
Decision Tree:
Accuracy: 0.890
precision: 0.596
f1:0.521
Logistic Regression:
Accuracy: 0.945
precision: 0.596
f1:0.521
Naive Bayes:
Accuracy: 0.927
precision: 0.596
f1:0.521


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
