In [None]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report
import plotly.express as px


In [None]:
train_data = pd.read_csv('train_set.csv')
test_data = pd.read_csv('test_set.csv')

X_train = train_data[['x', 'y']]
y_train = train_data['cluster']
X_test = test_data[['x', 'y']]
y_test = test_data['cluster']


# ***NORMAL kNN***

In [None]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)

In [None]:
f1 = f1_score(y_test, y_pred, average='weighted')
class_report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("F1 Score:", f1)
print("\nClassification Report:\n", class_report)
print("\nConfusion Matrix:\n", conf_matrix)

F1 Score: 0.8915151515151515

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       0.83      1.00      0.91         5
           2       1.00      1.00      1.00        13
           3       1.00      0.50      0.67         6
           4       0.67      1.00      0.80         4

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.88        30
weighted avg       0.93      0.90      0.89        30


Confusion Matrix:
 [[ 2  0  0  0  0]
 [ 0  5  0  0  0]
 [ 0  0 13  0  0]
 [ 0  1  0  3  2]
 [ 0  0  0  0  4]]


In [None]:
results = pd.DataFrame({
    'x': X_test['x'],
    'y': X_test['y'],
    'true_label': y_test,
    'predicted_label': y_pred
})

results['correct'] = results['true_label'] == results['predicted_label']

fig = px.scatter(results, x='x', y='y', color='correct',
                 color_discrete_map={True: 'green', False: 'red'},
                 title='Correct and Incorrect Predictions',
                 labels={'correct': 'Prediction Status'},
                 symbol='correct',
                 symbol_map={True: 'circle', False: 'x'})

fig.show()



# ***Improved  kNN***

In [None]:
class IKNN:
    def __init__(self, class_info_csv):
        self.class_info = pd.read_csv(class_info_csv)
        self.nbrs = NearestNeighbors(n_neighbors=3, algorithm='ball_tree').fit(self.class_info[['center_x', 'center_y']])

    def predict(self, test_df):
        predicted_classes = []
        for _, test_point in test_df.iterrows():
            distances, indices = self.nbrs.kneighbors([[test_point['x'], test_point['y']]])
            nearest_classes = self.class_info.iloc[indices[0]]

            scores = []
            for _, class_info_row in nearest_classes.iterrows():
                distance_sq = (test_point['x'] - class_info_row['center_x'])**2 + (test_point['y'] - class_info_row['center_y'])**2
                score = (class_info_row['count'] + class_info_row['mean_distance']) / distance_sq
                scores.append((score, class_info_row['cluster']))

            best_class = max(scores, key=lambda x: x[0])[1]
            predicted_classes.append(best_class)

        test_df['predicted_target'] = predicted_classes
        return test_df

    def evaluate(self, test_df):
        precision = precision_score(test_df['cluster'], test_df['predicted_target'], average='weighted')
        recall = recall_score(test_df['cluster'], test_df['predicted_target'], average='weighted')
        f1 = f1_score(test_df['cluster'], test_df['predicted_target'], average='weighted')

        print(f"Precision: {precision:.2f}")
        print(f"Recall: {recall:.2f}")
        print(f"F1 Score: {f1:.2f}")

        conf_matrix = confusion_matrix(test_df['cluster'], test_df['predicted_target'])
        print("Confusion Matrix:")
        print(conf_matrix)

        class_report = classification_report(test_df['cluster'], test_df['predicted_target'])
        print("Classification Report:")
        print(class_report)

    def visualize_predictions(self, test_df):
        test_df['correct'] = test_df['predicted_target'] == test_df['cluster']

        fig = px.scatter(test_df, x='x', y='y', color='correct',
                         color_discrete_map={True: 'green', False: 'red'},
                         title='Correct and Incorrect Predictions',
                         labels={'correct': 'Prediction Correctness'})

        fig.show()

class_info_csv = 'merged_file.csv'
test_csv = 'test_set.csv'

iknn = IKNN(class_info_csv)
test_df = pd.read_csv(test_csv)
test_df = iknn.predict(test_df)
iknn.evaluate(test_df)
iknn.visualize_predictions(test_df)

Precision: 0.97
Recall: 0.97
F1 Score: 0.97
Confusion Matrix:
[[ 2  0  0  0  0]
 [ 0  5  0  0  0]
 [ 0  0 13  0  0]
 [ 0  1  0  5  0]
 [ 0  0  0  0  4]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       0.83      1.00      0.91         5
           2       1.00      1.00      1.00        13
           3       1.00      0.83      0.91         6
           4       1.00      1.00      1.00         4

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30




X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does not have valid feature names, but NearestNeighbors was fitted with feature names


X does no