# Métodos de validación en Machine Learning
---
## Escuela Superior de Cómputo - Instituto Politécnico Nacional
> Daniel Armas Ramírez

> Machine Learning and Artificial Intelligence - PhD Consuelo Varinia García Mendoza

### Paso 1: Importar herramientas

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold
from sklearn.utils import resample

### Paso 2: Leer CSV

In [4]:
df = pd.read_csv('./metodosDeValidacion.csv')
df.head()

Unnamed: 0,x,y
0,1,2
1,2,4
2,3,6
3,4,8
4,5,10


In [5]:
X = df.drop(['y'], axis = 1).values
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, train_size = 0.6, shuffle = False)

In [7]:
X_train

array([[ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12]])

In [8]:
y_train

0      2
1      4
2      6
3      8
4     10
5     12
6     14
7     16
8     18
9     20
10    22
11    24
Name: y, dtype: int64

## Métodos de validación
1. Validación Cruzada

In [19]:
def cross_val(X_train, y_train, k = 3):
	print('\n ----------------------\n')
	print('\nValidación cruzada\n')
	kf = KFold(n_splits=k)
	print('\tX\t\n')
	for train, test in kf.split(X_train):
		print(X_train[train], X_train[test])
	print ('\n y \n')
	for train, test in kf.split(X_train):
		print(y_train[train], y_train[test])

In [20]:
cross_val(X_train, y_train, 3)


 ----------------------


Validación cruzada

	X	

[[ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[1]
 [2]
 [3]
 [4]]
[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 9]
 [10]
 [11]
 [12]] [[5]
 [6]
 [7]
 [8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]] [[ 9]
 [10]
 [11]
 [12]]

 y 

4     10
5     12
6     14
7     16
8     18
9     20
10    22
11    24
Name: y, dtype: int64 0    2
1    4
2    6
3    8
Name: y, dtype: int64
0      2
1      4
2      6
3      8
8     18
9     20
10    22
11    24
Name: y, dtype: int64 4    10
5    12
6    14
7    16
Name: y, dtype: int64
0     2
1     4
2     6
3     8
4    10
5    12
6    14
7    16
Name: y, dtype: int64 8     18
9     20
10    22
11    24
Name: y, dtype: int64


2. Leave One Out

In [25]:
def deja_uno_afuera(X_train,y_train):
	print('\n ----------------------\n')
	print('\n LeaveOneOut')
	loo = LeaveOneOut()
	print(loo.get_n_splits(X_train))
	print('\n X \n')
	for i, (train_index, test_index) in enumerate(loo.split(X_train)):
		print('\n',i,X_train[train_index], X_train[test_index])
	print('\n y \n')
	for i, (train_index, test_index) in enumerate(loo.split(X_train)):
		print('\n',i,y_train[train_index], y_train[test_index])

In [26]:
deja_uno_afuera(X_train, y_train)


 ----------------------


 LeaveOneOut
12

 X 


 0 [[ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[1]]

 1 [[ 1]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[2]]

 2 [[ 1]
 [ 2]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[3]]

 3 [[ 1]
 [ 2]
 [ 3]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[4]]

 4 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[5]]

 5 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[6]]

 6 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]] [[7]]

 7 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 9]
 [10]
 [11]
 [12]] [[8]]

 8 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [10]
 [11]
 [12]] [[9]]

 9 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [11]
 [12]] [[10]]

 10 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [12]] [[11]]

 11 [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]] [[12]]

 y 


 0 1      4
2  

3. Bootstrap

In [29]:
def bootstrap(X_train, y_train, nc, m):
	print('\n ----------------------\n')
	print('\n Bootstrap')
	print('\n X \n')
	for i in range(nc):
		train = resample(X_train, n_samples = m)
		test = np.array([x for x in X_train if x not in train])
		print('train',train,'test',test)
	print('\n y')
	for i in range(nc):
		train = resample(y_train, n_samples = m)
		test = np.array([x for x in y_train if x not in train])
		print('train',train,'test',test)
	print('\n')

In [30]:
bootstrap(X_train, y_train, 2, 5)


 ----------------------


 Bootstrap

 X 

train [[8]
 [6]
 [9]
 [6]
 [1]] test [[ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 7]
 [10]
 [11]
 [12]]
train [[12]
 [11]
 [12]
 [12]
 [ 7]] test [[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 8]
 [ 9]
 [10]]

 y
train 4    10
9    20
6    14
8    18
1     4
Name: y, dtype: int64 test [ 2 10 12 14 16 18 20 22 24]
train 11    24
2      6
4     10
8     18
9     20
Name: y, dtype: int64 test [ 6 10 12 14 16 18 20 22 24]


