Algorimo de KnearestNeighbors

In [7]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from collections import Counter

file_path = "dataset_phishing.csv"

df = pd.read_csv(file_path)

In [2]:
X = df[['length_url', 'nb_dots']].values  # Dos variables predictoras
y = (df['status'] == 'phishing').astype(int).values  # 1 para phishing, 0 para legitimate

In [3]:
# Normalización de las características para mejorar la convergencia
X = (X - X.mean(axis=0)) / X.std(axis=0)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [8]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def knn_predict(X_train, y_train, X_test, k=3):
    y_pred = []
    for x_test in X_test:
        distances = [euclidean_distance(x_test, x_train) for x_train in X_train]
        
        k_indices = np.argsort(distances)[:k]
        k_nearest_labels = [y_train[i] for i in k_indices]
        
        # Predicción: la clase más común entre los k vecinos
        most_common = Counter(k_nearest_labels).most_common(1)[0][0]
        y_pred.append(most_common)
    return np.array(y_pred)

In [9]:
k = 5  
y_pred = knn_predict(X_train, y_train, X_test, k=k)

In [10]:
accuracy = np.mean(y_pred == y_test)
print(f"Exactitud del modelo K-NN con k={k}: {accuracy:.4f}")


Exactitud del modelo K-NN con k=5: 0.6339


In [13]:
phishing = X_test[y_test == 1]
legitimate = X_test[y_test == 0]

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200), np.linspace(y_min, y_max, 200))

# Clasificar cada punto en el grid
Z = knn_predict(X_train, y_train, np.c_[xx.ravel(), yy.ravel()], k=k)
Z = Z.reshape(xx.shape)

# Graficar la frontera de decisión
plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z, alpha=0.8, cmap='coolwarm')
plt.scatter(phishing[:, 0], phishing[:, 1], color='red', label='Phishing (1)')
plt.scatter(legitimate[:, 0], legitimate[:, 1], color='blue', label='Legitimate (0)')
plt.title(f'Frontera de Decisión - K-NN (k={k})')
plt.xlabel('length_url (normalizado)')
plt.ylabel('nb_dots (normalizado)')
plt.legend()
plt.show()

KeyboardInterrupt: 