In [4]:
import csv
import math
import random
import matplotlib.pyplot as plt

# Function to load data from CSV file and split into train and test sets
def load_and_split_dataset(filename, split_ratio):
    dataset = []
    with open(filename, 'r') as file:
        csv_reader = csv.reader(file)
        next(csv_reader)  # Skip header
        for row in csv_reader:
            dataset.append([float(x) for x in row[:-1]] + [row[-1]])
    train_size = int(len(dataset) * split_ratio)
    random.shuffle(dataset)
    train_data = dataset[:train_size]
    test_data = dataset[train_size:]
    return train_data, test_data

# Function to calculate the Euclidean distance between two points
def euclidean_distance(point1, point2):
    distance = 0.0
    for i in range(len(point1)):
        distance += (point1[i] - point2[i]) ** 2
    return math.sqrt(distance)

# Function to get k nearest neighbors
def get_neighbors(train_data, test_instance, k):
    distances = []
    for train_instance in train_data:
        dist = euclidean_distance(test_instance[:-1], train_instance[:-1])
        distances.append((train_instance, dist))
    distances.sort(key=lambda x: x[1])
    neighbors = [(x[0], x[1]) for x in distances[:k]]
    return neighbors

# Function to make predictions
def predict_classification(train_data, test_instance, k):
    neighbors = get_neighbors(train_data, test_instance, k)
    class_votes = {}
    i = 0
    print("Objective Point:", test_instance[:-1])
    for neighbor, distance in neighbors:
        i = i + 1
        print(f"Vecino {i}:", neighbor[:], "Distance:", distance)
        response = neighbor[-1]
        if response in class_votes:
            class_votes[response] += 1
        else:
            class_votes[response] = 1
    sorted_votes = sorted(class_votes.items(), key=lambda x: x[1], reverse=True)
    return sorted_votes[0][0]

# Load and split dataset
filename = './files/Vinos.csv'
split_ratio = 0.7
train_data, test_data = load_and_split_dataset(filename, split_ratio)

# Define the value of k
k = 4

# Initialize a list to store accuracies
accuracies = []


# Make predictions for each test instance
for test_instance in test_data:
    predictions = []
    prediction = predict_classification(train_data, test_instance, k)
    predictions.append(prediction)
    
    # Calculate accuracy for the current test instance
    correct = 1 if test_instance[-1] == prediction else 0
    accuracy = correct / 1 * 100.0  # Accuracy for one test instance
    accuracies.append(accuracy)
    
    # Display the results for the current test instance
    print("Predicted Class based on neighbors:", prediction)
    print("Accuracy for this test instance:", accuracy)
    print("\n")
    
# Calculate the overall accuracy
overall_accuracy = sum(accuracies) / len(accuracies)
print("Overall Accuracy:", overall_accuracy)


Objective Point: [11.5, 0.41, 0.52, 3.0, 0.08, 29.0, 55.0, 1.0001, 3.26, 0.88, 11.0]
Vecino 1: [10.7, 0.52, 0.38, 2.6, 0.066, 29.0, 56.0, 0.99577, 3.15, 0.79, 12.1, 'Calidad_7'] Distance: 1.7498899247952713
Vecino 2: [10.4, 0.64, 0.24, 2.8, 0.105, 29.0, 53.0, 0.9998, 3.24, 0.67, 9.9, 'Calidad_5'] Distance: 2.576125984885056
Vecino 3: [9.6, 0.5, 0.36, 2.8, 0.116, 26.0, 55.0, 0.99722, 3.18, 0.68, 10.9, 'Calidad_5'] Distance: 3.5695103718017127
Vecino 4: [7.8, 0.64, 0.0, 1.9, 0.072, 27.0, 55.0, 0.9962, 3.31, 0.63, 11.0, 'Calidad_5'] Distance: 4.391853732764789
Predicted Class based on neighbors: Calidad_5
Accuracy for this test instance: 100.0


Objective Point: [6.7, 0.54, 0.13, 2.0, 0.076, 15.0, 36.0, 0.9973, 3.61, 0.64, 9.8]
Vecino 1: [6.7, 0.54, 0.13, 2.0, 0.076, 15.0, 36.0, 0.9973, 3.61, 0.64, 9.8, 'Calidad_5'] Distance: 0.0
Vecino 2: [7.1, 0.46, 0.14, 2.8, 0.076, 15.0, 37.0, 0.99624, 3.36, 0.49, 10.7, 'Calidad_5'] Distance: 1.6436243864094973
Vecino 3: [7.1, 0.46, 0.14, 2.8, 0.076, 