# Modelo día 0

In [2]:
# Clase del knn 

import numpy as np
import pandas as pd
from itertools import compress

class Knn:
    """Classifier implementing an argmax among reference value corresponding to neighbours within a given distance matrix
    
    Atr:
        weights (array): Array with the weights of the explicative variables.
        k (int): Number of neighbours setted.
        dist_matrix (array): Distance matrix of the training sample
        data_train (array): Training sample
        neighbours (array): Array of dimension (n_samples, k) with neighbours indexes of each taining sample.
        preds (array): Array of dimension (n_samples, 2) with pressure predicted and SQR associated of each taining sample.
        target (array):Array of dimension n_samples with the pressure setting of each training sample.
        ref (array): Array of dimension n_samples with the SQI of each training sample.
    """
    def __init__(self, k=11, weights=None, dist_matrix=None):
        """K nearest neighbours for pressure settings that maximize the SQI value

        Args:
            k (int): Number of neighbours. Defaults to 11.
            weights (list, optional): Weigths for input variables. Defaults to None.
            dist_matrix (ndarray, optional): Pre defined matrix distance between training samples. Defaults to None.
        """
        self.weights = weights
        self.k = k
        self.dist_matrix = dist_matrix
        self.data_train = None
        self.neighbours = None
        self.preds = None
        self.range_vars = None
        self.target = None
        self.ref = None

    def fit(self, data_train, target, ref):
        """Fit the model using data_train as training data and target(SQI) as target values for ref setting pressure.

        Args:
            data_train (ndarray): [description]
            target (1d-array): [description]
            ref (1d-array): [description]
        """
        if self.weights is None:
            self.weights = np.ones(data_train.shape[1]) * (1 / data_train.shape[1])

        max_vars = data_train.max(axis=0)
        min_vars = data_train.min(axis=0)
        self.data_train = data_train
        self.target = target
        self.ref = ref
        self.range_vars = max_vars - min_vars
        self.dist_matrix = self.__dist(self.data_train)
        self.neighbours, self.preds = self.__knn(self.dist_matrix)

    def __knn(self, dist_matrix):
        """Internal method that returns neighbours and predictions given a distance matrix

        Args:
            dist_matrix (ndarray): Distance matrix 

        Returns:
            tuple: neighbours, preds for each sample
        """
        neighbours = dist_matrix.argsort()[:, 1:self.k + 1]
        max_ref_neighbours = self.ref[neighbours].argmax(axis=1)
        # se tiene que poder hacer mejor
        preds_index = neighbours.reshape(-1, )[
            [i * neighbours.shape[1] for i in range(neighbours.shape[0])] + max_ref_neighbours]
        preds = np.hstack((self.target[preds_index].reshape(-1, 1), self.ref[preds_index].reshape(-1, 1)))
        return neighbours, preds

    def __dist(self, data_points):
        """Internal method that compute the distance matrix

        Args:
            data_points (ndarray): Samples to calculate the distance between them and the training samples.

        Returns:
                ndarray: dist matrix
        """
        def f(vec):
            """Auxiliary function to vectorize distance calculus

            Args:
                vec (array): Vector of samples
            
            Returns:
                function: funciton to compute the calculus of distance matrix
            """
            return np.sum((np.abs(vec - self.data_train) / self.range_vars) * self.weights, axis=1)

        dist = np.apply_along_axis(f, 1, data_points)

        return dist

    def predict(self, data_pred, neighbours_index=False, dist_matrix=False):
        """	 Predict the class labels for the provided data.

        Args:
            data_pred (ndarray): Samples for prediccion
            neighbours_index (bool, optional): Indicates whether the indexes of the neighbours are turned (True) or not (False). Defaults to False.
            dist_matrix (bool, optional): Indicates whether the distance matrix is turned (True) or not (False). Defaults to False.

        Returns:
            list: list of predictions
        """
        # One-dimensional vector reshape
        if data_pred.ndim == 1:
            data_pred = np.reshape(data_pred, (1, data_pred.shape[0]))

        dist_preds = self.__dist(data_pred)
        neighbours, preds = self.__knn(dist_preds)
        mask = [True, neighbours_index, dist_matrix]
        predicted = [preds, neighbours, dist_preds]
        try:
            predictions = list(*compress(predicted, mask))
        except:
            predictions = list(compress(predicted, mask))
        return predictions

In [17]:
"""
Se carga el dataset con los datos necesarios (sesiones) para entrenar el Knn: presiones, altura, peso, sqr, posicion y sexo.

Codificamos la posición y sexo como numéricos, de la siguiente manera

posicion Lateral = 0
posicion Supine = 1

sexo Male = 1
sexo Female = 0

"""

perfiles = pd.read_parquet('../data/processed/perfiles_sqr_knn.parquet')

In [12]:
perfiles

Unnamed: 0,presiones,altura,peso,sqr,posicion,sexo
22217,122232,178.0,75.0,83.008680,0,1
22268,023321,162.0,75.0,70.773244,0,0
22361,122232,180.0,70.0,70.144818,0,1
22362,023111,160.0,50.0,87.152947,0,0
22387,023321,162.0,75.0,81.126354,0,0
...,...,...,...,...,...,...
84990,122222,167.0,58.0,71.508019,1,1
84994,022211,157.0,57.0,56.875936,0,0
84997,022211,157.0,57.0,71.011566,0,0
85001,023321,170.0,80.0,99.304167,0,1


In [4]:
# Definición del Knn
knn = Knn(k=11, weights=np.ones(4)*0.25)

# Ajuste del Knn
knn.fit(perfiles[['altura', 'peso', 'posicion', 'sexo']].values, perfiles["presiones"].values, perfiles["sqr"].values)

In [5]:
# Atributo neighbours: Contiene los vecinos más cercanos en cada muestra de train
knn.neighbours

array([[   53, 14840,    49, ..., 14220, 14205,    30],
       [14059,    42,    44, ..., 13900,    70,    72],
       [ 4230, 17124, 14217, ...,   165,  8600, 11763],
       ...,
       [13129, 14595, 14596, ..., 10092, 14158, 14159],
       [18227, 21217, 18094, ..., 12033, 14161,  9822],
       [21081, 21030, 20842, ..., 13829, 13828, 13827]])

In [20]:
# Ejemplo de predicción sobre un individuo, devolviendo los índices de los vecinos más cercanos en train
pred, neigbours = knn.predict(np.array([178.0, 75.0, 0, 1]), True, False)

In [21]:
# Valores del SQR de los vecinos más cercanos al individuo predicho, mediante el atributo ref
knn.ref[neigbours]

array([[65.98134492, 93.20951211, 65.99530992, 87.20077405, 78.59440703,
        82.92540377, 77.95684388, 77.40713213, 89.09445439, 88.82032828,
        91.59429859]])

In [22]:
# Predicción asociada al individuo predicho, configuración recomendada y SQR asociado
pred

array([['012332', 93.20951211038317]], dtype=object)