In [13]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

In [14]:
# Load the dataset
data = pd.read_csv('sonar.csv')

In [15]:
data.head()

Unnamed: 0,attribute_1',attribute_2',attribute_3',attribute_4',attribute_5',attribute_6',attribute_7',attribute_8',attribute_9',attribute_10',...,attribute_52',attribute_53',attribute_54',attribute_55',attribute_56',attribute_57',attribute_58',attribute_59',attribute_60',class'
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,Rock
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,Rock
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,Rock
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,Rock
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,Rock


In [16]:
# Separate the features and the target
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target (last column)

In [17]:
# Map class labels to binary values
y = y.map({'Rock': 0, 'Mine': 1})

In [18]:
# List to store the results for each run
results = []

In [19]:
# Run the model 5 times
for i in range(5):
    # Split the dataset (70% training, 30% testing)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=i)
    
    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Initialize KNN classifier
    knn = KNeighborsClassifier(n_neighbors=5)
    
    # Train the model
    knn.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = knn.predict(X_test_scaled)
    y_prob = knn.predict_proba(X_test_scaled)[:, 1]  # For ROC AUC
    
    # Calculate confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    
    # Calculate the performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)
    
    # Calculate FPR (False Positive Rate) and FNR (False Negative Rate)
    fpr = fp / (fp + tn)
    fnr = fn / (fn + tp)
    
    # Append the results for this run
    results.append({
        'Run': i + 1,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'FPR': fpr,
        'FNR': fnr,
        'AUC': auc
    })

In [20]:
# Convert results to a DataFrame for easier analysis
results_df = pd.DataFrame(results)

In [21]:
# Print the results for each run
print(results_df)

   Run  Accuracy  Precision    Recall  F1-Score       FPR       FNR       AUC
0    1  0.825397   0.780488  0.941176  0.853333  0.310345  0.058824  0.906694
1    2  0.730159   0.692308  0.843750  0.760563  0.387097  0.156250  0.886089
2    3  0.841270   0.800000  0.941176  0.864865  0.275862  0.058824  0.956389
3    4  0.809524   0.769231  0.909091  0.833333  0.300000  0.090909  0.899495
4    5  0.793651   0.739130  0.971429  0.839506  0.428571  0.028571  0.928061


In [23]:
# Calculate the average accuracy across all runs
average_accuracy = results_df['Accuracy'].mean()
print(f'\nAverage Accuracy over 5 runs: {average_accuracy:.4f}')# Calculate the average accuracy across all runs


Average Accuracy over 5 runs: 0.8000
