In [27]:
import numpy as np
import sys
sys.path.append("/Users/utilizador/Documents/GitHub/si/src")
sys.path.append("/Users/utilizador/Documents/GitHub/si")
from si.statistics.euclidean_distance import euclidean_distance
from Exercises.Ex7_RMSE import RMSE


class KNNRegressor:
    """
    K-Nearest Neighbors Regressor class for regression problems.

    Parameters:
    -----------
    k: int
        The number of nearest examples to consider.

    distance: callable
        A function that calculates the distance between a sample and the samples in the training dataset.
    
    Attributes:
    -----------
    dataset: tuple (X, y)
        Stores the training dataset, where X is the feature matrix and y is the target vector.
    """

    def __init__(self, k, distance):
        self.k = k
        self.distance = distance if distance is not None else euclidean_distance
        self.dataset = None

    def _fit(self, dataset):
        """
        Store the training dataset.

        Parameters:
        -----------
        dataset: tuple (X, y)
            The training dataset, where X is the feature matrix and y is the target vector.

        Returns:
        --------
        self: KNNRegressor
            The fitted regressor.
        """
        self.dataset = dataset
        return self

    def _predict(self, dataset):
        """
        Predict values for the test dataset.

        Parameters:
        -----------
        dataset: numpy array
            The test dataset feature matrix (X).

        Returns:
        --------
        predictions: numpy array
            The predicted values (y_pred) for the test dataset.
        """
        X_train, y_train = self.dataset
        X_test = dataset
        predictions = []

        for x in X_test:
            
            distances = np.array([self.distance(x, x_train) for x_train in X_train]) #distância entre o X e todos os training samples
            
            nearest_indexes = distances.argsort()[:self.k] #index do k nearest neighbors
            
            nearest_values = y_train[nearest_indexes]  #valores correpondentes 
            
            predictions.append(np.mean(nearest_values)) #média dos predicted values

        return np.array(predictions)

    def _score(self, dataset):
        """
        Calculate the RMSE between predictions and actual values.

        Parameters:
        -----------
        dataset: tuple (X, y)
            The test dataset, where X is the feature matrix and y is the target vector.

        Returns:
        --------
        error: float
            The RMSE between predictions and actual values.
        """
        X_test, y_test = dataset
        y_pred = self._predict(X_test)
        error = RMSE(y_test, y_pred) 
        return error

In [17]:
from si.io.csv_file import read_csv
import pandas as pd

Path= "/Users/utilizador/Documents/GitHub/si/datasets/cpu/"
data = pd.read_csv(Path + "cpu.csv", sep=",", index_col=False)



In [18]:
print (data.head())

   syct  mmin   mmax  cach  chmin  chmax  perf
0   125   256   6000   256     16    128   198
1    29  8000  32000    32      8     32   269
2    29  8000  32000    32      8     32   220
3    29  8000  32000    32      8     32   172
4    29  8000  16000    32      8     16   132


In [25]:
from sklearn.model_selection import train_test_split

X = data.to_numpy()[:, :-1]  
y = data.to_numpy()[:, -1]   


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [30]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [37]:
# Instanciação e treinamento do modelo
knn = KNNRegressor(k=2, distance=euclidean_distance)
knn._fit((X_train, y_train))

# Predições
predictions = knn._predict(X_test)

# Avaliação do modelo
score = knn._score((X_test, y_test))

# Resultados
print("Predições:", predictions)
print("RMSE:", score)

Predições: [177.5  17.5  33.  165.   17.  323.5  66.   26.  573.  171.5  89.  123.
  22.5  46.   41.5  33.   74.   29.  134.   73.   48.5  94.  108.5  16.5
  14.5  40.   21.5  25.5  36.   61.  323.5  28.5  27.5  19.   25.5  39.
  42.5 428.   34.   20.5 401.   33.  152.5 428.  412.5  22.  325.   25.5
  23.   40.   32.5  25.   45.   26.  178.   89.   25.   10.  144.  323.5
 108.5  49.  244.5  73.   57.5  39.   28.5 177.5  61.  160.   33.5  63.
 102.  152.5  66.   19.   61.   40.   22.   63.  106.5  33.   19.   32.5]
RMSE: 137.13019534803587
