In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import random

class OneR:
    def __init__(self):
        self.model = None
        self.accuracy = None

    def train(self, data, class_column):
        if not data:
            raise ValueError("No hay datos de entrenamiento")

        features = list(data[0].keys())
        features.remove(class_column)
        best_feature = None
        best_accuracy = 0
        best_rules = None

        for feature in features:
            rules = {}
            correct_predictions = 0
            total_predictions = 0

            for instance in data:
                feature_value = instance[feature]
                class_value = instance[class_column]

                if feature_value not in rules:
                    rules[feature_value] = {}

                if class_value not in rules[feature_value]:
                    rules[feature_value][class_value] = 0

                rules[feature_value][class_value] += 1

            for feature_value in rules:
                rules[feature_value] = max(rules[feature_value], key=rules[feature_value].get)

            for instance in data:
                feature_value = instance[feature]
                if feature_value in rules:
                    prediction = rules[feature_value]
                    if prediction == instance[class_column]:
                        correct_predictions += 1
                total_predictions += 1

            accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0

            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = feature
                best_rules = rules

        self.model = (best_feature, best_rules)
        return self.model

    def predict(self, instance):
        if not self.model:
            raise ValueError("El modelo no ha sido entrenedo todavía")
        feature, rules = self.model
        feature_value = instance[feature]
        if rules is None or feature_value not in rules:
            return None
        return rules[feature_value]

    def test(self, test_data, class_column):
        if not test_data:
            self.accuracy = 0
            return self.accuracy

        correct_predictions = 0
        total_predictions = 0

        for instance in test_data:
            prediction = self.predict(instance)
            if prediction == instance[class_column]:
                correct_predictions += 1
            total_predictions += 1

        self.accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
        return self.accuracy

class DataHandler:
    def __init__(self, filename):
        self.filename = filename
        self.data = None
        self.headers = None

    def load_data(self):
        with open(self.filename, 'r') as file:
            lines = file.readlines()
            if not lines:
                raise ValueError("Archivo vacío")
            self.headers = [value.strip() for value in lines[0].strip().split(',')]
            self.data = []
            for line in lines[1:]:
                values = [value.strip() for value in line.strip().split(',')]
                instance = {self.headers[i]: values[i] for i in range(len(self.headers))}
                self.data.append(instance)

    def split_data(self, train_percentage):
        if not self.data:
            raise ValueError("No hay datos para dividir. Asegúrese de cargar los datos primero.")
        train_size = int(len(self.data) * train_percentage)
        if train_size == 0:
            raise ValueError("El conjunto de entrenamiento está vacío. Aumente el porcentaje de entrenamiento.")
        shuffled_data = random.sample(self.data, len(self.data))
        return shuffled_data[:train_size], shuffled_data[train_size:]

def main():
    filename = input("Ingrese el nombre del archivo: ")
    num_iterations = int(input("Ingrese el número de iteraciones: "))
    train_percentage = float(input("Ingrese el porcentaje de entrenamiento (0-100 %): ")) / 100

    if train_percentage <= 0 or train_percentage >= 1:
        print("El porcentaje de entrenamiento debe ser mayor que 0 % y menor que 100 %.")
        return

    data_handler = DataHandler(filename)
    try:
        data_handler.load_data()
    except FileNotFoundError:
        print(f"El archivo '{filename}' no se encontró.")
        return
    except ValueError as e:
        print(f"Error al cargar los datos: {str(e)}")
        return

    class_column = "Class"
    models = []
    accuracies = []

    for i in range(num_iterations):
        try:
            train_data, test_data = data_handler.split_data(train_percentage)
        except ValueError as e:
            print(f"Error en la iteración {i+1}: {str(e)}")
            break

        one_r = OneR()
        try:
            model = one_r.train(train_data, class_column)
            accuracy = (one_r.test(test_data, class_column)) * 100
        except ValueError as e:
            print(f"Error en la iteración {i+1}: {str(e)}")
            exit(1)

        models.append(model)
        accuracies.append(accuracy)

        print(f"\nModelo {i+1}")
        print(f"Característica: {model[0]}")
        print(f"Reglas: {model[1]}")
        print(f"Exactitud del modelo: {accuracy:.2f}%\n")

    if not accuracies:
        print("No se pudo completar ninguna iteración exitosamente.")
        return

    avg_accuracy = (sum(accuracies) / len(accuracies))
    closest_model_index = min(range(len(accuracies)), key=lambda i: abs(accuracies[i] - avg_accuracy))

    print(f"Modelo con exactitud más cercana al promedio (Modelo {closest_model_index + 1}):")
    print(f"Característica: {models[closest_model_index][0]}")
    print(f"Reglas: {models[closest_model_index][1]}")
    print(f"Exactitud del Modelo: {accuracy:.2f}%\n")
    print(f"EXACTITUD PROMEDIO: {avg_accuracy:.2f}%")


if __name__ == "__main__":
    main()

Ingrese el nombre del archivo: /content/drive/MyDrive/golf-dataset-categorical.csv
Ingrese el número de iteraciones: 10
Ingrese el porcentaje de entrenamiento (0-100 %): 10

Modelo 1
Característica: Outlook
Reglas: {'Rainy': 'Yes'}
Exactitud del modelo: 15.38%


Modelo 2
Característica: Outlook
Reglas: {'Overcast': 'Yes'}
Exactitud del modelo: 23.08%


Modelo 3
Característica: Outlook
Reglas: {'Sunny': 'No'}
Exactitud del modelo: 15.38%


Modelo 4
Característica: Outlook
Reglas: {'Overcast': 'Yes'}
Exactitud del modelo: 23.08%


Modelo 5
Característica: Outlook
Reglas: {'Rainy': 'Yes'}
Exactitud del modelo: 15.38%


Modelo 6
Característica: Outlook
Reglas: {'Sunny': 'Yes'}
Exactitud del modelo: 7.69%


Modelo 7
Característica: Outlook
Reglas: {'Rainy': 'Yes'}
Exactitud del modelo: 15.38%


Modelo 8
Característica: Outlook
Reglas: {'Rainy': 'Yes'}
Exactitud del modelo: 15.38%


Modelo 9
Característica: Outlook
Reglas: {'Overcast': 'Yes'}
Exactitud del modelo: 23.08%


Modelo 10
Caracter