# Deprem

In [811]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler,LabelEncoder
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import roc_curve, auc,confusion_matrix
from sklearn.metrics import accuracy_score, recall_score, cohen_kappa_score, f1_score, roc_auc_score
from pandas.plotting import table


In [819]:
column_names = ["Station_ID","Location","Longitude","Latitude","Vs30(m/s)","EBD(m)","Predomninant_frequency","H/V_ratio","Building_damage_situation"]

df = pd.read_csv('data.csv', names=column_names)
df = df.drop(0)
df = df.drop(columns=["Station_ID","Location","Longitude","Latitude"])

df.head()

Unnamed: 0,Vs30(m/s),EBD(m),Predomninant_frequency,H/V_ratio,Building_damage_situation
1,720,15,536,34,0
2,546,22,144,34,0
3,315,520,143,59,1
4,521,33,560,39,0
5,238,146,500,66,1


In [812]:
categoric_features = ["Location","Building_damage_situation"]

le = LabelEncoder()
for column in categoric_features:
    df[column] = le.fit_transform(df[column]) 


df.head()

Unnamed: 0,Location,Longitude,Latitude,Vs30(m/s),EBD(m),Predomninant_frequency,H/V_ratio,Building_damage_situation
1,2,3662062,3701184,720,15,536,34,0
2,0,36573771,36930889,546,22,144,34,0
3,1,36648373,3709933,315,520,143,59,1
4,3,36670482,37128525,521,33,560,39,0
5,13,3614766,3658801,238,146,500,66,1


Split data into test,  validation and train

In [813]:
train, test = np.array_split(df.sample(frac=1),  [int(0.7*len(df))])
print(train.count(), test.count())

Location                     30
Longitude                    30
Latitude                     30
Vs30(m/s)                    30
EBD(m)                       30
Predomninant_frequency       30
H/V_ratio                    30
Building_damage_situation    30
dtype: int64 Location                     14
Longitude                    14
Latitude                     14
Vs30(m/s)                    14
EBD(m)                       14
Predomninant_frequency       14
H/V_ratio                    14
Building_damage_situation    14
dtype: int64


  return bound(*args, **kwds)


In [814]:
unwanted_columns = ['Location','Longitude','Latitude']

In [817]:
def scale_dataset(df, oversample):
    
    df = df.drop(columns=unwanted_columns)
    X = df[df.columns[:-1]].values
    y = df[df.columns[-1]].values
    
    X = np.array([[float(str(val).replace(',', '.')) for val in row] for row in X])
    
    if oversample:
        print(df['Building_damage_situation'].value_counts())
        ros = RandomOverSampler()
        X, y = ros.fit_resample(X, y)
        print('0   ', len(X), '\n1   ', len(y))
        
    data = pd.DataFrame(np.hstack((X, np.reshape(y, (-1, 1)))), columns=df.columns)
    df.head()
    return data, X, y





In [818]:
train, X_train, y_train = scale_dataset(train, oversample= False)
test, X_test, y_test = scale_dataset(test, oversample=False)

KeyError: "['Location', 'Longitude', 'Latitude'] not found in axis"

In [None]:
def get_scores(y_true, y_pred, y_pred_prob):
    cm = confusion_matrix(y_true, y_pred)

    accuracy = accuracy_score(y_true, y_pred)
    specificity = recall_score(y_true, y_pred, pos_label=0)
    sensitivity = recall_score(y_true, y_pred)
    f1_score_value = f1_score(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    
    roc_auc = roc_auc_score(y_true, y_pred_prob)

    return {
        'Accuracy': accuracy,
        'Specificity': specificity,
        'Sensitivity': sensitivity,
        'F1 Score': f1_score_value,
        'Kappa': kappa,
        'AUC': roc_auc
    }
    

In [None]:
from sklearn.metrics import confusion_matrix

def get_train_result(model, X_test, y_test, is_nn=False):
    y_pred = model.predict(X_test)
    if is_nn:
        y_pred_prob = y_pred[:, 0]
    else:
        y_pred_prob = model.predict_proba(X_test)[:, 1]

    y_pred_binary = (y_pred_prob > 0.5).astype(int)

    plt.figure(figsize=(15, 4))

    # Confusion Matrix
    plt.subplot(1, 3, 2)
    cm = confusion_matrix(y_test, y_pred_binary)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', annot_kws={"size": 16})
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')

    # Metric Scores Table
    plt.subplot(1, 3, 3)
    scores = get_scores(y_test, y_pred_binary, y_pred_prob)
    scores_df = pd.DataFrame(list(scores.items()), columns=['Metric', 'Score'])
    plt.axis('off')  # Sadece tablo görüntülenir, eksenler kapatılır
    plt.table(cellText=scores_df.values, colLabels=scores_df.columns, cellLoc='center', loc='center')
    plt.tight_layout()
    plt.show()


# KNN Model

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score


In [None]:
highest_accuracy = 0.0
best_accuracy_model = None
best_k_value = None

for i in range(1, 10):
    knn_model = KNeighborsClassifier(n_neighbors=i)
    temp_model = knn_model.fit(X_train, y_train)
    
    y_pred = temp_model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    
    ## Cross validation test
    scores = cross_val_score(knn_model,X_test,y_test,cv=5)
    print("cross validate score for k={0} ".format(i),scores.mean())
    
    if accuracy > highest_accuracy:
        highest_accuracy = accuracy
        best_accuracy_model = temp_model
        best_k_value = i
        
print("Model resut with best k={} without cross validation: ".format(best_k_value) , highest_accuracy)


In [None]:
get_train_result(best_accuracy_model, X_test, y_test)

# Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
highest_accuracy_nb = 0.0
best_accuracy_model_nb = None

for var_smoothing in [1e-9, 1e-8, 1e-7]:
    nb_model = GaussianNB(var_smoothing=var_smoothing)
    temp_model_nb = nb_model.fit(X_train, y_train)
    
    y_pred_nb = temp_model_nb.predict(X_test)
    
    accuracy_nb = accuracy_score(y_test, y_pred_nb)
    
    ## Cross validation test
    scores = cross_val_score(nb_model,X_test,y_test,cv=4)
    print("cross validate score for smoothing={0} ".format(var_smoothing),scores.mean())
    
    if accuracy_nb > highest_accuracy_nb:
        highest_accuracy_nb = accuracy_nb
        best_accuracy_model_nb = temp_model_nb
        
print("Model resut with best smoothing={} without cross validation: ".format(var_smoothing) , highest_accuracy_nb)


In [None]:
get_train_result(best_accuracy_model_nb, X_test, y_test)

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
highest_accuracy_lr = 0.0
best_accuracy_model_lr = None

for C in [0.001, 0.01, 0.1, 1, 10, 100]:
    lr_model = LogisticRegression(C=C, random_state=42,solver='liblinear')
    temp_model_lr = lr_model.fit(X_train, y_train)
    
    y_pred_lr = temp_model_lr.predict(X_test)
    
    accuracy_lr = accuracy_score(y_test, y_pred_lr)
     ## Cross validation test
    scores = cross_val_score(lr_model,X_test,y_test,cv=4)
    print("cross validate score for C={0} ".format(C),scores.mean())
    
    
    if accuracy_lr > highest_accuracy_lr:
        highest_accuracy_lr = accuracy_lr
        best_accuracy_model_lr = temp_model_lr
        
print("Model resut with best C={} without cross validation: ".format(C) , highest_accuracy_lr)


In [None]:
get_train_result(best_accuracy_model_lr, X_test, y_test)

# SVM

In [None]:
from sklearn.svm import SVC

In [None]:
highest_accuracy_svm = 0.0
best_accuracy_model_svm = None

for C in [0.001, 0.01, 0.1, 1, 10, 100]:
    svm_model = SVC(C=C, probability=True, random_state=42,kernel='rbf')
    temp_model_svm = svm_model.fit(X_train, y_train)
    
    y_pred_svm = temp_model_svm.predict(X_test)
    
    accuracy_svm = accuracy_score(y_test, y_pred_svm)
    
    ## Cross validation test
    scores = cross_val_score(svm_model,X_test,y_test,cv=4)
    print("cross validate score for C={0} ".format(C),scores.mean())
    
    
    if accuracy_svm > highest_accuracy_svm:
        highest_accuracy_svm = accuracy_svm
        best_accuracy_model_svm = temp_model_svm
        
print("Model resut with best C={} without cross validation: ".format(C) , highest_accuracy_svm)


In [None]:
get_train_result(best_accuracy_model_svm, X_test, y_test)

# Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier


In [None]:
highest_accuracy_tree = 0.0
best_accuracy_model_tree = None
best_value = 0

for max_depth in [None, 5, 10, 15]:
    tree_model = DecisionTreeClassifier(random_state=42, max_depth=max_depth)
    temp_model_tree = tree_model.fit(X_train, y_train)
    
    y_pred_tree = temp_model_tree.predict(X_test)
    
    accuracy_tree = accuracy_score(y_test, y_pred_tree)
    
    ## Cross validation test
    scores = cross_val_score(tree_model,X_test,y_test,cv=4)
    print("cross validate score for depth={0} ".format(max_depth),scores.mean())
    
    
    if accuracy_tree > highest_accuracy_tree:
        highest_accuracy_tree = accuracy_tree
        best_accuracy_model_tree = temp_model_tree
        best_value = max_depth

print("Model resut with best depth={} without cross validation: ".format(best_value) , highest_accuracy_tree)


In [None]:
get_train_result(best_accuracy_model_tree, X_test, y_test)