In [232]:
import numpy as np
import pandas as pd
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix


In [220]:
def Euclidean_distance(row1, row2):
    distance = 0
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2 #(x1-x2)**2+(y1-y2)**2
    return sqrt(distance)


In [233]:
def Get_Neighbors(train, test_row, num):
    distance = list() # []
    data = []
    for i in train:
        dist = Euclidean_distance(test_row, i)
        distance.append(dist)
        data.append(i)
    distance = np.array(distance)
    data = np.array(data)
    # we are finding index of min distance
    index_dist = distance.argsort()
    #we arange our data acco. to index
    data = data[index_dist]
    #we are slicing num number of datas 
    neighbors = data[:num]
    
    return neighbors


In [222]:
def predict_classification(train, test_row, num):
    Neighbors = Get_Neighbors(train, test_row, num)
    Classes = []
    for i in Neighbors:
        Classes.append(i[-1])
    prediction = max(Classes, key= Classes.count)
    return prediction


In [223]:

def Evaluate(y_true, y_pred):
    n_correct = 0
    for i in range(len(y_true)):
        if y_true[i] == y_pred[i]:
            n_correct += 1
    acc = n_correct/len(y_true)
    return acc



In [224]:
def knn_predict(X_train, X_test, y_train, num_neighbors=5):
    # Euclidean distance calculation
    
    predictions = []
    for test_row in X_test:
        distances = []
        for train_row, train_label in zip(X_train, y_train):
            dist = Euclidean_distance(test_row, train_row)
            distances.append((train_row, dist, train_label))
        distances.sort(key=lambda x: x[1])
        neighbors = distances[:num_neighbors]
        neighbor_labels = [neighbor[2] for neighbor in neighbors]
        prediction = max(set(neighbor_labels), key=neighbor_labels.count)
        predictions.append(prediction)
    return predictions


In [225]:

dataset = pd.read_csv('C:\\Users\\Bhumika\\Downloads\\archive (2)\\ROCK_OR_MINE.csv')


In [226]:
X = dataset.iloc[:, :-1].values  # Features
y = dataset.iloc[:, -1].values   # Target variable


In [230]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
num_neighbors = 5
predicted_labels = knn_predict(X_train, X_test, y_train, num_neighbors)
accuracy = Evaluate(y_test, predicted_labels)
print(f"Accuracy of the KNN model: {accuracy}")
print(f"Predicted labels: {predicted_labels}")
print(f"True labels: {y_test}")
conf_matrix = confusion_matrix(y_test, predicted_labels)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy of the KNN model: 0.7380952380952381
Predicted labels: ['R', 'R', 'R', 'R', 'M', 'R', 'M', 'M', 'M', 'R', 'M', 'R', 'M', 'M', 'M', 'M', 'R', 'M', 'R', 'R', 'M', 'M', 'M', 'M', 'R', 'M', 'R', 'M', 'M', 'M', 'M', 'R', 'R', 'M', 'R', 'R', 'M', 'R', 'M', 'M', 'R', 'M']
True labels: ['M' 'R' 'R' 'M' 'M' 'R' 'M' 'M' 'R' 'M' 'M' 'R' 'M' 'M' 'M' 'M' 'M' 'M'
 'R' 'R' 'M' 'M' 'M' 'M' 'R' 'R' 'R' 'R' 'M' 'M' 'M' 'R' 'R' 'M' 'M' 'M'
 'M' 'M' 'R' 'M' 'R' 'M']
Confusion Matrix:
[[20  7]
 [ 4 11]]


In [231]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.1, random_state=42)
num_neighbors2 = 5
predicted_labels2 = knn_predict(X_train2, X_test2, y_train2, num_neighbors2)
accuracy2 = Evaluate(y_test2, predicted_labels2)
print(f"Accuracy of the KNN model: {accuracy2}")
print(f"Predicted labels: {predicted_labels2}")
print(f"True labels: {y_test2}")
conf_matrix2 = confusion_matrix(y_test2, predicted_labels2)
print("Confusion Matrix:")
print(conf_matrix2)

Accuracy of the KNN model: 0.7619047619047619
Predicted labels: ['R', 'R', 'R', 'R', 'M', 'R', 'M', 'M', 'M', 'R', 'M', 'R', 'M', 'M', 'M', 'M', 'R', 'M', 'R', 'R', 'M']
True labels: ['M' 'R' 'R' 'M' 'M' 'R' 'M' 'M' 'R' 'M' 'M' 'R' 'M' 'M' 'M' 'M' 'M' 'M'
 'R' 'R' 'M']
Confusion Matrix:
[[10  4]
 [ 1  6]]


In [234]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7380952380952381
