In [1]:
import pandas as pd
import numpy as np
import numpy.linalg as la
from scipy.stats import kurtosis, skew
from scipy.spatial.distance import mahalanobis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
X = np.array(
    [
    [0.8, 0.8],
    [0.8, 1.2],
    [3.8, 2.8],
    [4.2, 3.2],
    [1.0, 1.0],
    [1.2, 1.2],
    [4.2, 2.8],
    [4.4, 2.8],
    [3.5, 1.0],
    [4.0, 1.0],
    [3.8, 0.5],
    [4.0, 0.7]
    ])
y = np.array([1, 1, 2, 2, 1, 1, 2, 2, 3, 3, 3, 3])

In [3]:
knn_model = KNeighborsClassifier(3)
knn_model.fit(X, y)

KNeighborsClassifier(n_neighbors=3)

In [4]:
print(knn_model.predict_proba([[3.0, 2.0]]))
print(knn_model.predict([[3.0, 2.0]]))

[[0.         0.33333333 0.66666667]]
[3]


In [5]:
print(knn_model.predict_proba([X[1]]))
print(knn_model.predict([X[1]]))

[[1. 0. 0.]]
[1]


## Punto 5

In [6]:
df = pd.read_excel('../datos/ELE.xlsx')
df.tail()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24
495,1,24,2,30,5,3,4,4,3,20,...,1,0,0,1,0,0,1,0,0,1
496,2,36,2,90,2,2,3,1,4,29,...,1,0,0,0,1,1,0,0,0,0
497,4,24,4,16,1,4,3,3,2,40,...,1,0,0,1,0,0,1,0,0,1
498,2,18,2,13,1,5,4,2,1,32,...,1,0,0,0,0,0,1,0,1,0
499,3,6,4,13,2,5,1,4,3,28,...,1,1,0,1,0,0,1,0,0,1


In [7]:
def describeVariable(series):
    print(f'Descripción de variable {series.name}:')
    uniques = len(series.unique())
    isDiscrete = True if uniques < 10 else False
    print(f'Cantidad de valores únicos: {uniques}')
    if isDiscrete:
        print('Conteo de valores unicos:')
        print(series.value_counts().to_dict())
    print(f'Media: {series.mean()}')
    print(f'Curtosis: {kurtosis(series)}')
    print(f'Asimetría: {skew(series)}')
    # plt.hist(series)
    # plt.show()
    print('\n')
    

In [8]:
for i in range(1, 25):
    describeVariable(df[f'X{i}'])

Descripción de variable X1:
Cantidad de valores únicos: 4
Conteo de valores unicos:
{4: 197, 2: 144, 1: 128, 3: 31}
Media: 2.594
Curtosis: -1.6401317919876475
Asimetría: 0.0010773085525939531


Descripción de variable X2:
Cantidad de valores únicos: 30
Media: 20.308
Curtosis: 0.7267794208488452
Asimetría: 1.1104165505133297


Descripción de variable X3:
Cantidad de valores únicos: 5
Conteo de valores unicos:
{2: 268, 4: 149, 3: 46, 0: 21, 1: 16}
Media: 2.572
Curtosis: -0.5093885068386337
Asimetría: -0.051635253124258984


Descripción de variable X4:
Cantidad de valores únicos: 102
Media: 32.378
Curtosis: 3.564834228170157
Asimetría: 1.8600898470708302


Descripción de variable X5:
Cantidad de valores únicos: 5
Conteo de valores unicos:
{1: 301, 5: 88, 2: 49, 3: 33, 4: 29}
Media: 2.108
Curtosis: -0.7089901947387096
Asimetría: 0.9975010259755569


Descripción de variable X6:
Cantidad de valores únicos: 5
Conteo de valores unicos:
{3: 174, 5: 129, 4: 84, 2: 79, 1: 34}
Media: 3.39
Curtosis

#### Vector de medias

Usando la fórmula $\bar{X} = \frac{1}{m}X^T1 $, donde $1$ es un vector de unos de longitud $m$

In [9]:
X = df.to_numpy()
m, n = X.shape
X_bar = 1 / m * X.T @ np.ones((m, 1))
print(X_bar.T)

[[2.5940e+00 2.0308e+01 2.5720e+00 3.2378e+01 2.1080e+00 3.3900e+00
  2.6920e+00 2.8260e+00 2.3640e+00 3.5528e+01 2.6940e+00 1.3940e+00
  1.1440e+00 1.4160e+00 1.0440e+00 2.0800e-01 1.0200e-01 9.0800e-01
  4.0000e-02 1.7200e-01 7.2400e-01 2.2000e-02 1.9200e-01 6.2200e-01]]


Usando el método `mean()` del objeto DataFrame

In [10]:
df.mean()

X1      2.594
X2     20.308
X3      2.572
X4     32.378
X5      2.108
X6      3.390
X7      2.692
X8      2.826
X9      2.364
X10    35.528
X11     2.694
X12     1.394
X13     1.144
X14     1.416
X15     1.044
X16     0.208
X17     0.102
X18     0.908
X19     0.040
X20     0.172
X21     0.724
X22     0.022
X23     0.192
X24     0.622
dtype: float64

#### Matriz de varianzas y covarianzas

Usando la fórmula $S = \frac{1}{m}X^TPX $

In [11]:
P = np.identity(m) - 1 / m * (np.ones((m, 1)) @ np.ones((m, 1)).T)
S = 1 / m * (X.T @ P @ X)
pd.DataFrame(S, columns=df.columns, index=df.columns)

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24
X1,1.541164,-1.436952,0.208232,-2.996532,0.341848,0.13434,-0.003048,-0.068644,-0.118216,0.136368,...,0.001864,-0.015552,0.011412,0.048648,-0.01376,-0.028168,0.075944,-0.013068,0.003952,0.032532
X2,-1.436952,146.453136,-1.152176,208.933576,0.432736,0.23788,-0.213136,0.607592,3.219888,-4.362624,...,-0.419552,-0.426064,0.488584,-0.071664,0.11168,-0.248976,-0.388992,-0.042776,-0.497136,-0.065576
X3,0.208232,-1.152176,1.156816,-2.240216,0.074224,0.12092,-0.033824,0.085528,-0.074208,2.063984,...,-0.001168,0.025024,0.017656,0.002624,0.00712,-0.040384,0.043872,-0.002584,0.006176,-0.017784
X4,-2.996532,208.933576,-2.240216,796.119116,0.735176,-2.53342,-1.247576,0.669772,9.732408,7.298416,...,-0.436632,-0.502624,2.029444,-0.303224,0.66488,0.184984,-2.355672,-0.002316,-1.030576,-2.109116
X5,0.341848,0.432736,0.074224,0.735176,2.472336,0.26588,0.005264,0.124792,-0.077312,1.254976,...,0.021248,-0.006464,0.022984,0.023936,0.00568,0.029424,0.001808,0.005624,0.017264,0.016824
X6,0.13434,0.23788,0.12092,-2.53342,0.26588,1.4779,0.02612,0.30386,0.10804,3.70408,...,-0.00716,0.00488,0.01222,0.00988,-0.0076,-0.02908,-0.00236,-0.04858,0.01512,0.05142
X7,-0.003048,-0.213136,-0.033824,-1.247576,0.005264,0.02612,0.505136,0.004408,-0.043888,0.022624,...,0.011552,0.006064,0.009416,-0.008336,-0.00168,-0.021024,0.016992,-0.001224,0.003136,0.013576
X8,-0.068644,0.607592,0.085528,0.669772,0.124792,0.30386,0.004408,1.207724,0.171336,4.119872,...,-0.014344,0.002192,0.021748,-0.002008,0.00296,0.059928,-0.146024,-0.000172,-0.004592,-0.009772
X9,-0.118216,3.219888,-0.074208,9.732408,-0.077312,0.10804,-0.043888,0.171336,1.131504,1.213808,...,-0.028016,-0.009712,0.072872,0.035488,0.00144,-0.008608,-0.155536,0.003992,-0.081888,-0.030408
X10,0.136368,-4.362624,2.063984,7.298416,1.254976,3.70408,0.022624,4.119872,1.213808,131.325216,...,0.006768,0.390176,0.128144,0.006576,-0.00112,-0.878816,-0.286272,0.076384,0.388624,-1.094416


Usando la fórmula $\frac{1}{m}\tilde{X}^T\tilde{X}$, donde $ \tilde{X} $ es la matriz de datos centrados

In [12]:
X_centered = X - np.tile(X_bar.T, (500, 1))
S = 1 / m * (X_centered.T @ X_centered)
pd.DataFrame(S, columns=df.columns, index=df.columns)

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24
X1,1.541164,-1.436952,0.208232,-2.996532,0.341848,0.13434,-0.003048,-0.068644,-0.118216,0.136368,...,0.001864,-0.015552,0.011412,0.048648,-0.01376,-0.028168,0.075944,-0.013068,0.003952,0.032532
X2,-1.436952,146.453136,-1.152176,208.933576,0.432736,0.23788,-0.213136,0.607592,3.219888,-4.362624,...,-0.419552,-0.426064,0.488584,-0.071664,0.11168,-0.248976,-0.388992,-0.042776,-0.497136,-0.065576
X3,0.208232,-1.152176,1.156816,-2.240216,0.074224,0.12092,-0.033824,0.085528,-0.074208,2.063984,...,-0.001168,0.025024,0.017656,0.002624,0.00712,-0.040384,0.043872,-0.002584,0.006176,-0.017784
X4,-2.996532,208.933576,-2.240216,796.119116,0.735176,-2.53342,-1.247576,0.669772,9.732408,7.298416,...,-0.436632,-0.502624,2.029444,-0.303224,0.66488,0.184984,-2.355672,-0.002316,-1.030576,-2.109116
X5,0.341848,0.432736,0.074224,0.735176,2.472336,0.26588,0.005264,0.124792,-0.077312,1.254976,...,0.021248,-0.006464,0.022984,0.023936,0.00568,0.029424,0.001808,0.005624,0.017264,0.016824
X6,0.13434,0.23788,0.12092,-2.53342,0.26588,1.4779,0.02612,0.30386,0.10804,3.70408,...,-0.00716,0.00488,0.01222,0.00988,-0.0076,-0.02908,-0.00236,-0.04858,0.01512,0.05142
X7,-0.003048,-0.213136,-0.033824,-1.247576,0.005264,0.02612,0.505136,0.004408,-0.043888,0.022624,...,0.011552,0.006064,0.009416,-0.008336,-0.00168,-0.021024,0.016992,-0.001224,0.003136,0.013576
X8,-0.068644,0.607592,0.085528,0.669772,0.124792,0.30386,0.004408,1.207724,0.171336,4.119872,...,-0.014344,0.002192,0.021748,-0.002008,0.00296,0.059928,-0.146024,-0.000172,-0.004592,-0.009772
X9,-0.118216,3.219888,-0.074208,9.732408,-0.077312,0.10804,-0.043888,0.171336,1.131504,1.213808,...,-0.028016,-0.009712,0.072872,0.035488,0.00144,-0.008608,-0.155536,0.003992,-0.081888,-0.030408
X10,0.136368,-4.362624,2.063984,7.298416,1.254976,3.70408,0.022624,4.119872,1.213808,131.325216,...,0.006768,0.390176,0.128144,0.006576,-0.00112,-0.878816,-0.286272,0.076384,0.388624,-1.094416


#### Matriz de varianzas corregida

Usando la fórmula $\frac{1}{m-1}\tilde{X}^T\tilde{X}$, donde $ \tilde{X} $ es la matriz de datos centrados

In [13]:
X_centered = X - np.tile(X_bar.T, (500, 1))
S_corrected = 1 / (m - 1) * (X_centered.T @ X_centered)
pd.DataFrame(S_corrected, columns=df.columns, index=df.columns)

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24
X1,1.544253,-1.439832,0.208649,-3.002537,0.342533,0.134609,-0.003054,-0.068782,-0.118453,0.136641,...,0.001868,-0.015583,0.011435,0.048745,-0.013788,-0.028224,0.076096,-0.013094,0.00396,0.032597
X2,-1.439832,146.746629,-1.154485,209.352281,0.433603,0.238357,-0.213563,0.60881,3.226341,-4.371367,...,-0.420393,-0.426918,0.489563,-0.071808,0.111904,-0.249475,-0.389772,-0.042862,-0.498132,-0.065707
X3,0.208649,-1.154485,1.159134,-2.244705,0.074373,0.121162,-0.033892,0.085699,-0.074357,2.06812,...,-0.00117,0.025074,0.017691,0.002629,0.007134,-0.040465,0.04396,-0.002589,0.006188,-0.01782
X4,-3.002537,209.352281,-2.244705,797.714545,0.736649,-2.538497,-1.250076,0.671114,9.751912,7.313042,...,-0.437507,-0.503631,2.033511,-0.303832,0.666212,0.185355,-2.360393,-0.002321,-1.032641,-2.113343
X5,0.342533,0.433603,0.074373,0.736649,2.477291,0.266413,0.005275,0.125042,-0.077467,1.257491,...,0.021291,-0.006477,0.02303,0.023984,0.005691,0.029483,0.001812,0.005635,0.017299,0.016858
X6,0.134609,0.238357,0.121162,-2.538497,0.266413,1.480862,0.026172,0.304469,0.108257,3.711503,...,-0.007174,0.00489,0.012244,0.0099,-0.007615,-0.029138,-0.002365,-0.048677,0.01515,0.051523
X7,-0.003054,-0.213563,-0.033892,-1.250076,0.005275,0.026172,0.506148,0.004417,-0.043976,0.022669,...,0.011575,0.006076,0.009435,-0.008353,-0.001683,-0.021066,0.017026,-0.001226,0.003142,0.013603
X8,-0.068782,0.60881,0.085699,0.671114,0.125042,0.304469,0.004417,1.210144,0.171679,4.128128,...,-0.014373,0.002196,0.021792,-0.002012,0.002966,0.060048,-0.146317,-0.000172,-0.004601,-0.009792
X9,-0.118453,3.226341,-0.074357,9.751912,-0.077467,0.108257,-0.043976,0.171679,1.133772,1.21624,...,-0.028072,-0.009731,0.073018,0.035559,0.001443,-0.008625,-0.155848,0.004,-0.082052,-0.030469
X10,0.136641,-4.371367,2.06812,7.313042,1.257491,3.711503,0.022669,4.128128,1.21624,131.588393,...,0.006782,0.390958,0.128401,0.006589,-0.001122,-0.880577,-0.286846,0.076537,0.389403,-1.096609


Usando el método `cov()` del objeto DataFrame

In [14]:
df.cov()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X15,X16,X17,X18,X19,X20,X21,X22,X23,X24
X1,1.544253,-1.439832,0.208649,-3.002537,0.342533,0.134609,-0.003054,-0.068782,-0.118453,0.136641,...,0.001868,-0.015583,0.011435,0.048745,-0.013788,-0.028224,0.076096,-0.013094,0.00396,0.032597
X2,-1.439832,146.746629,-1.154485,209.352281,0.433603,0.238357,-0.213563,0.60881,3.226341,-4.371367,...,-0.420393,-0.426918,0.489563,-0.071808,0.111904,-0.249475,-0.389772,-0.042862,-0.498132,-0.065707
X3,0.208649,-1.154485,1.159134,-2.244705,0.074373,0.121162,-0.033892,0.085699,-0.074357,2.06812,...,-0.00117,0.025074,0.017691,0.002629,0.007134,-0.040465,0.04396,-0.002589,0.006188,-0.01782
X4,-3.002537,209.352281,-2.244705,797.714545,0.736649,-2.538497,-1.250076,0.671114,9.751912,7.313042,...,-0.437507,-0.503631,2.033511,-0.303832,0.666212,0.185355,-2.360393,-0.002321,-1.032641,-2.113343
X5,0.342533,0.433603,0.074373,0.736649,2.477291,0.266413,0.005275,0.125042,-0.077467,1.257491,...,0.021291,-0.006477,0.02303,0.023984,0.005691,0.029483,0.001812,0.005635,0.017299,0.016858
X6,0.134609,0.238357,0.121162,-2.538497,0.266413,1.480862,0.026172,0.304469,0.108257,3.711503,...,-0.007174,0.00489,0.012244,0.0099,-0.007615,-0.029138,-0.002365,-0.048677,0.01515,0.051523
X7,-0.003054,-0.213563,-0.033892,-1.250076,0.005275,0.026172,0.506148,0.004417,-0.043976,0.022669,...,0.011575,0.006076,0.009435,-0.008353,-0.001683,-0.021066,0.017026,-0.001226,0.003142,0.013603
X8,-0.068782,0.60881,0.085699,0.671114,0.125042,0.304469,0.004417,1.210144,0.171679,4.128128,...,-0.014373,0.002196,0.021792,-0.002012,0.002966,0.060048,-0.146317,-0.000172,-0.004601,-0.009792
X9,-0.118453,3.226341,-0.074357,9.751912,-0.077467,0.108257,-0.043976,0.171679,1.133772,1.21624,...,-0.028072,-0.009731,0.073018,0.035559,0.001443,-0.008625,-0.155848,0.004,-0.082052,-0.030469
X10,0.136641,-4.371367,2.06812,7.313042,1.257491,3.711503,0.022669,4.128128,1.21624,131.588393,...,0.006782,0.390958,0.128401,0.006589,-0.001122,-0.880577,-0.286846,0.076537,0.389403,-1.096609


#### Varianza total

In [15]:
np.trace(S)

1085.7257240000006

#### Varianza total

In [16]:
np.trace(S) / df.shape[1]

45.23857183333336

#### Varianza generalizada

In [17]:
la.det(S)

9.968903572843005e-08

#### Desviación típica generalizada

In [18]:
np.power(la.det(S), 1 / n)

0.5108306824876402

#### Distancia de Mahalanobis

La distancia de Mhalanobis entre un registro y su vector de medias se define como:

$$
d_i = \left[ \left( X_i - \bar{X} \right)^T S^{-1} \left( X_i -  \bar{X} \right) \right]^{1/2}
$$

In [19]:
def mahalanobis_custom(row, means, sigma):
    return np.sqrt(((row - means.T) @ la.inv(sigma) @ (row - means.T).T)[0][0])
d = []
for row in X:
    d.append(mahalanobis_custom(row, X_bar, S))
pd.DataFrame(d).head()

Unnamed: 0,0
0,4.792281
1,3.738912
2,4.483663
3,6.508128
4,5.040302


Usando la función `mahalanobis()` de la librería `scipy.spatial.distance`

In [20]:
d = []
for row in X:
    d.append(mahalanobis(row, X_bar, la.inv(S)))
distances = pd.Series(d)
distances.head()

0    4.792281
1    3.738912
2    4.483663
3    6.508128
4    5.040302
dtype: float64

In [21]:
distances.max()

10.067522799715276

In [22]:
distances.sort_values(0)

127     2.370383
248     2.438412
253     2.446164
185     2.550212
230     2.558265
         ...    
187     8.341820
429     8.465488
140     8.631787
438     9.566976
236    10.067523
Length: 500, dtype: float64

## Punto 7

In [23]:
df = pd.read_excel('../datos/ELE2.xlsx')
df

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X16,X17,X18,X19,X20,X21,X22,X23,X24,Y
0,1,6,4,12,5,5,3,4,1,67,...,0,0,1,0,0,1,0,0,1,1
1,2,48,2,60,1,3,2,2,1,22,...,0,0,1,0,0,1,0,0,1,2
2,4,12,4,21,1,4,3,3,1,49,...,0,0,1,0,0,1,0,1,0,1
3,1,42,2,79,1,4,3,4,2,45,...,0,0,0,0,0,0,0,0,1,1
4,1,24,3,49,1,3,3,4,4,53,...,1,0,1,0,0,0,0,0,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,1,24,2,30,5,3,4,4,3,20,...,0,0,1,0,0,1,0,0,1,2
496,2,36,2,90,2,2,3,1,4,29,...,0,0,0,1,1,0,0,0,0,2
497,4,24,4,16,1,4,3,3,2,40,...,0,0,1,0,0,1,0,0,1,1
498,2,18,2,13,1,5,4,2,1,32,...,0,0,0,0,0,1,0,1,0,1


In [24]:
# TODO: Realizar eliminación de variables
# La variable endógena debe estar entre 0 y 1
df.Y = df.Y -1
X = df.drop('Y', axis=1)
Y = df.Y 
df_train, df_test = train_test_split(df, train_size=0.7, random_state=43)
X_train = df_train.drop('Y', axis=1)
Y_train = df_train.Y
X_test = df_test.drop('Y', axis=1)
Y_test = df_test.Y


In [25]:
logit = LogisticRegression(penalty='l2', max_iter=1000, C=0.5)
logit_fit = logit.fit(X_train, Y_train)

In [26]:
beta = logit_fit.coef_[0]
beta

array([-0.67464989,  0.02969474, -0.39810411,  0.00356309, -0.08497784,
       -0.24835858, -0.25574377,  0.01481197,  0.04916498,  0.00286656,
       -0.36291549,  0.21393477,  0.36736144, -0.10077942, -0.44364889,
        0.42401538, -0.62751024,  0.60569357,  0.20480007,  0.03064353,
        0.32174457, -0.29814577, -0.53342945, -0.30460399])

In [27]:
df_train['Predicciones'] = logit_fit.predict(X_train)
df_train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['Predicciones'] = logit_fit.predict(X_train)


Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X17,X18,X19,X20,X21,X22,X23,X24,Y,Predicciones
257,1,12,1,21,1,3,1,1,4,29,...,0,1,0,0,0,0,0,1,1,1
203,1,12,2,9,1,4,4,4,2,21,...,0,1,0,1,0,0,0,1,1,0
22,1,10,4,22,1,2,3,3,1,48,...,0,1,0,1,0,0,1,0,0,0
397,1,36,4,23,1,3,4,2,2,46,...,0,1,0,0,1,0,0,1,0,0
429,1,18,4,12,1,1,2,4,4,55,...,0,1,0,0,0,1,0,0,1,1


$$
Precision = 1 -\frac{\sum_i^m \left| Y^{(i)} - \hat{Y}^{(i)} \right|}{m}
$$

In [28]:
precision_train = 1 - (np.abs(df_train.Y - df_train.Predicciones)).sum() / len(Y_train)
print(f'Precision train: {precision_train}')

Precision train: 0.7828571428571429


In [29]:
df_test['Predicciones'] = logit_fit.predict(X_test)
df_test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test['Predicciones'] = logit_fit.predict(X_test)


Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X17,X18,X19,X20,X21,X22,X23,X24,Y,Predicciones
17,1,30,0,81,5,2,3,3,3,25,...,0,1,0,0,1,0,0,1,0,1
127,2,12,2,6,1,3,3,2,3,30,...,0,1,0,0,1,0,0,1,1,0
363,4,6,2,18,1,3,4,2,2,21,...,0,1,0,1,0,0,0,1,0,0
82,4,18,2,16,2,3,2,4,2,24,...,0,1,0,1,0,0,1,0,0,0
60,2,9,2,14,1,3,4,1,1,27,...,0,1,0,0,1,0,0,1,0,0


In [30]:
precision_test = 1 - (np.abs(df_test.Y - df_test.Predicciones)).sum() / len(Y_test)
print(f'Precision test: {precision_test}')

Precision test: 0.7733333333333333
