
# Parte 1

In [26]:
from imblearn.over_sampling import SMOTE
from collections import Counter
import random
import numpy as np
import math

## Metodos de validacion

In [15]:
def hold_out(dataset, test_size=0.2):
    dataset = dataset[:]
    random.shuffle(dataset)
    split_idx = int(len(dataset) * (1 - test_size))
    train_set = dataset[:split_idx]
    test_set = dataset[split_idx:]
    return train_set, test_set

def k_fold_split(dataset, k=10):
    dataset = dataset[:]
    random.shuffle(dataset)
    fold_size = len(dataset) // k
    folds = [dataset[i * fold_size:(i + 1) * fold_size] for i in range(k)]
    return folds

## Clasificador euclidiano y 1NN

In [16]:
# Funciones previamente definidas
def euclidean_distance(point1, point2):
    return math.sqrt(sum((x - y) ** 2 for x, y in zip(point1, point2)))

def calculate_centroids(train_set):
    class_points = {}
    for point, label in train_set:
        if label not in class_points:
            class_points[label] = []
        class_points[label].append(point)
    centroids = {}
    for label, points in class_points.items():
        num_points = len(points)
        centroid = [sum(coord) / num_points for coord in zip(*points)]
        centroids[label] = centroid
    return centroids

def euclidean_classifier(train_set, test_point):
    centroids = calculate_centroids(train_set)
    min_distance = float('inf')
    predicted_label = None
    for label, centroid in centroids.items():
        distance = euclidean_distance(test_point, centroid)
        if distance < min_distance:
            min_distance = distance
            predicted_label = label
    return predicted_label

def one_nn_classifier(train_set, test_point):
    nearest_neighbor = None
    min_distance = float('inf')
    for train_point, label in train_set:
        distance = euclidean_distance(test_point, train_point)
        if distance < min_distance:
            min_distance = distance
            nearest_neighbor = label
    return nearest_neighbor

## Carga del dataset

In [19]:
def load_glass_dataset(filepath):
    dataset = []
    with open(filepath, 'r') as file:
        for line in file:
            values = line.strip().split(',')
            features = list(map(float, values[1:-1]))  # Excluir ID y etiqueta
            label = int(values[-1])  # La etiqueta
            dataset.append((features, label))
    return dataset

## Accuracy

In [17]:
def accuracy_score(true_labels, predicted_labels):
    correct_predictions = sum(1 for true, pred in zip(true_labels, predicted_labels) if true == pred)
    return correct_predictions / len(true_labels)


## Ejecucion

In [21]:
# Cargar dataset
filepath = "glass.data"
dataset = load_glass_dataset(filepath)

# Hold-Out Evaluation
print("\nHold-Out Evaluation:")
# Dividir el dataset en entrenamiento y prueba
train_set, test_set = hold_out(dataset, test_size=0.2)

# Separar características y etiquetas para SMOTE
X_train = [point for point, _ in train_set]
y_train = [label for _, label in train_set]

# Mostrar distribución antes del sobremuestreo
counter = Counter(y_train)
print("Before SMOTE (Hold-Out):", counter)

# Aplicar SMOTE
smt = SMOTE(random_state=42)
X_train_sm, y_train_sm = smt.fit_resample(X_train, y_train)

# Mostrar distribución después del sobremuestreo
counter = Counter(y_train_sm)
print("After SMOTE (Hold-Out):", counter)

# Reconstruir el conjunto de entrenamiento sobremuestreado
train_set_sm = list(zip(X_train_sm, y_train_sm))

# Separar características y etiquetas del conjunto de prueba
X_test = [point for point, _ in test_set]
y_test = [label for _, label in test_set]

# Clasificación antes del SMOTE
predictions_1nn = [one_nn_classifier(train_set, point) for point in X_test]
predictions_euclidean = [euclidean_classifier(train_set, point) for point in X_test]
acc_1nn_before = accuracy_score(y_test, predictions_1nn)
acc_euclidean_before = accuracy_score(y_test, predictions_euclidean)
print(f"1-NN Accuracy Before SMOTE: {acc_1nn_before * 100:.2f}%")
print(f"Euclidean Classifier Accuracy Before SMOTE: {acc_euclidean_before * 100:.2f}%")

# Clasificación después del SMOTE
predictions_1nn_sm = [one_nn_classifier(train_set_sm, point) for point in X_test]
predictions_euclidean_sm = [euclidean_classifier(train_set_sm, point) for point in X_test]
acc_1nn_sm = accuracy_score(y_test, predictions_1nn_sm)
acc_euclidean_sm = accuracy_score(y_test, predictions_euclidean_sm)
print(f"1-NN Accuracy After SMOTE: {acc_1nn_sm * 100:.2f}%")
print(f"Euclidean Classifier Accuracy After SMOTE: {acc_euclidean_sm * 100:.2f}%")

# 10-Fold Cross Validation Evaluation
print("\n10-Fold Cross Validation Evaluation:")
k = 10
folds = k_fold_split(dataset, k)

acc_1nn_before = []
acc_euclidean_before = []
acc_1nn_after = []
acc_euclidean_after = []

for i in range(k):
    # Separar el fold actual para prueba
    test_set = folds[i]
    train_set = [point for j, fold in enumerate(folds) if j != i for point in fold]

    # Dividir características y etiquetas para SMOTE
    X_train = [point for point, _ in train_set]
    y_train = [label for _, label in train_set]

    # Sobremuestreo con SMOTE
    smt = SMOTE(random_state=42)
    X_train_sm, y_train_sm = smt.fit_resample(X_train, y_train)
    train_set_sm = list(zip(X_train_sm, y_train_sm))

    # Separar características y etiquetas del conjunto de prueba
    X_test = [point for point, _ in test_set]
    y_test = [label for _, label in test_set]

    # Clasificación antes del SMOTE
    predictions_1nn = [one_nn_classifier(train_set, point) for point in X_test]
    predictions_euclidean = [euclidean_classifier(train_set, point) for point in X_test]
    acc_1nn_before.append(accuracy_score(y_test, predictions_1nn))
    acc_euclidean_before.append(accuracy_score(y_test, predictions_euclidean))

    # Clasificación después del SMOTE
    predictions_1nn_sm = [one_nn_classifier(train_set_sm, point) for point in X_test]
    predictions_euclidean_sm = [euclidean_classifier(train_set_sm, point) for point in X_test]
    acc_1nn_after.append(accuracy_score(y_test, predictions_1nn_sm))
    acc_euclidean_after.append(accuracy_score(y_test, predictions_euclidean_sm))

# Promediar los resultados
print(f"1-NN Accuracy Before SMOTE (10-Fold): {sum(acc_1nn_before) / k * 100:.2f}%")
print(f"Euclidean Classifier Accuracy Before SMOTE (10-Fold): {sum(acc_euclidean_before) / k * 100:.2f}%")
print(f"1-NN Accuracy After SMOTE (10-Fold): {sum(acc_1nn_after) / k * 100:.2f}%")
print(f"Euclidean Classifier Accuracy After SMOTE (10-Fold): {sum(acc_euclidean_after) / k * 100:.2f}%")



Hold-Out Evaluation:
Before SMOTE (Hold-Out): Counter({2: 66, 1: 51, 7: 21, 3: 14, 5: 12, 6: 7})
After SMOTE (Hold-Out): Counter({7: 66, 5: 66, 2: 66, 1: 66, 3: 66, 6: 66})
1-NN Accuracy Before SMOTE: 86.05%
Euclidean Classifier Accuracy Before SMOTE: 41.86%
1-NN Accuracy After SMOTE: 83.72%
Euclidean Classifier Accuracy After SMOTE: 34.88%

10-Fold Cross Validation Evaluation:
1-NN Accuracy Before SMOTE (10-Fold): 73.81%
Euclidean Classifier Accuracy Before SMOTE (10-Fold): 40.48%
1-NN Accuracy After SMOTE (10-Fold): 71.43%
Euclidean Classifier Accuracy After SMOTE (10-Fold): 40.48%


# Parte 2

## Perceptron simple

In [24]:
class Perceptron:
    def __init__(self, input_size, learning_rate=0.01, epochs=100):
        self.input_size = input_size  # Tamaño de la entrada (número de características)
        self.learning_rate = learning_rate  # Tasa de aprendizaje
        self.epochs = epochs  # Número de épocas (iteraciones)
        self.weights = np.zeros(input_size + 1)  # Inicializar pesos (incluyendo el sesgo)

    def activation(self, x):
        # Función de activación escalón
        return 1 if x >= 0 else 0

    def predict(self, inputs):
        # Calcula la salida del perceptrón
        inputs = np.array(inputs)
        weighted_sum = np.dot(inputs, self.weights[1:]) + self.weights[0]
        return self.activation(weighted_sum)

    def train(self, training_inputs, labels):
        # Entrenamiento del Perceptrón
        for epoch in range(self.epochs):
            for inputs, label in zip(training_inputs, labels):
                prediction = self.predict(inputs)
                error = label - prediction
                # Actualizar los pesos
                self.weights[1:] += self.learning_rate * error * np.array(inputs)
                self.weights[0] += self.learning_rate * error  # Actualización del sesgo

    def evaluate(self, test_inputs, test_labels):
        # Evaluar el rendimiento del modelo
        predictions = [self.predict(inputs) for inputs in test_inputs]
        accuracy = np.mean(np.array(predictions) == np.array(test_labels))
        return accuracy

## Cargar dataset

In [22]:
def load_iris_dataset(filepath):
    iris_data = []
    with open(filepath, 'r') as file:
        for line in file:
            values = line.strip().split(',')
            features = list(map(float, values[:-1]))  # Características (sin la clase)
            label = values[-1]

            # Filtrar solo Setosa y Virginica
            if label == 'Iris-setosa':
                label = 0  # Clase 0 para Setosa
            elif label == 'Iris-virginica':
                label = 1  # Clase 1 para Virginica
            else:
                continue  # Ignorar otras clases

            iris_data.append((features, label))
    return iris_data

## Ejecucion

In [27]:

# Cargar el dataset Iris (Setosa vs Virginica)
filepath = "iris.data"
dataset = load_iris_dataset(filepath)

# Dividir el dataset en entrenamiento y prueba (70/30)
train_set, test_set = hold_out(dataset, test_size=0.3)

# Separar características y etiquetas para entrenamiento y prueba
X_train = [point for point, _ in train_set]
y_train = [label for _, label in train_set]
X_test = [point for point, _ in test_set]
y_test = [label for _, label in test_set]

# Crear un perceptrón
perceptron = Perceptron(input_size=4, learning_rate=0.1, epochs=100)

# Entrenar el perceptrón
perceptron.train(X_train, y_train)

# Evaluar el perceptrón
accuracy = perceptron.evaluate(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Probar con nuevas entradas
for test in X_test:
    print(f"Input: {test}, Prediction: {perceptron.predict(test)}")

Accuracy: 100.00%
Input: [4.6, 3.2, 1.4, 0.2], Prediction: 0
Input: [5.2, 3.5, 1.5, 0.2], Prediction: 0
Input: [6.7, 3.1, 5.6, 2.4], Prediction: 1
Input: [7.7, 3.8, 6.7, 2.2], Prediction: 1
Input: [5.2, 4.1, 1.5, 0.1], Prediction: 0
Input: [4.9, 3.1, 1.5, 0.1], Prediction: 0
Input: [7.2, 3.2, 6.0, 1.8], Prediction: 1
Input: [4.5, 2.3, 1.3, 0.3], Prediction: 0
Input: [4.8, 3.1, 1.6, 0.2], Prediction: 0
Input: [6.9, 3.1, 5.1, 2.3], Prediction: 1
Input: [6.3, 2.5, 5.0, 1.9], Prediction: 1
Input: [5.1, 3.8, 1.9, 0.4], Prediction: 0
Input: [5.7, 4.4, 1.5, 0.4], Prediction: 0
Input: [6.4, 2.8, 5.6, 2.2], Prediction: 1
Input: [4.8, 3.0, 1.4, 0.1], Prediction: 0
Input: [6.4, 2.7, 5.3, 1.9], Prediction: 1
Input: [6.5, 3.0, 5.8, 2.2], Prediction: 1
Input: [6.3, 3.3, 6.0, 2.5], Prediction: 1
Input: [7.9, 3.8, 6.4, 2.0], Prediction: 1
Input: [6.7, 3.3, 5.7, 2.5], Prediction: 1
Input: [5.8, 2.8, 5.1, 2.4], Prediction: 1
Input: [5.8, 2.7, 5.1, 1.9], Prediction: 1
Input: [6.3, 2.9, 5.6, 1.8], Predict