# Maestría en Maestría en Ciencia de Datos e Inteligencia Artificial
#### 8. Machine Learning and Deep Learning
#### Docente: Msc. Renzo Claure Aracena.

### Validacion Cruzada
Validación sobre Regresión Logística para los Datos de Flores

In [None]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
nombres = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width','Class']
flores = pd.read_csv(url, names = nombres)
flores.head()

In [None]:
flores['Class'].value_counts()

In [None]:
sns.set_style('whitegrid')
sns.FacetGrid(flores, hue='Class', height=4) \
   .map(plt.scatter, 'Petal_Length', 'Petal_Width') \
   .add_legend()
plt.show()

In [None]:
from sklearn.svm import SVC
X = flores.drop(['Class'], axis=1)
y = flores['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clas_flores = SVC(kernel = 'rbf').fit(X_train, y_train)

In [None]:
print('Score SVC Entrenamiento: {:.3f}'.format(clas_flores.score(X_train, y_train)))
print('Score SVC Comprobacion: {:.3f}'.format(clas_flores.score(X_test, y_test)))

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
X = flores.drop(['Class'], axis=1)
y = flores['Class']
clas_flores = SVC(kernel = 'rbf')
cv_scores = cross_val_score(clas_flores, X, y, cv=3)
print('Scores para CrossValidation, 3 Folds: ',cv_scores )
print('Score Promedio para CrossValidation, 3 Folds: {:.3f}'.format(np.mean(cv_scores)))

In [None]:
from sklearn.model_selection import validation_curve

param_range = [0.001, 0.01, 1, 10, 1000]
train_scores, test_scores = validation_curve(SVC(), X, y,
                                            param_name='gamma',
                                            param_range=param_range, cv=3)

In [None]:
print(train_scores)

In [None]:
print(test_scores)

In [None]:
#curva de validacion CV
plt.figure()

train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.title('Validation Curve with SVM')
plt.xlabel('$\gamma$ (gamma)')
plt.ylabel('Score')
plt.ylim(0.0, 1.1)
lw = 2

plt.semilogx(param_range, train_scores_mean, label='Training score',
            color='darkorange', lw=lw)

plt.fill_between(param_range, train_scores_mean - train_scores_std,
                train_scores_mean + train_scores_std, alpha=0.2,
                color='darkorange', lw=lw)

plt.semilogx(param_range, test_scores_mean, label='Cross-validation score',
            color='navy', lw=lw)

plt.fill_between(param_range, test_scores_mean - test_scores_std,
                test_scores_mean + test_scores_std, alpha=0.2,
                color='navy', lw=lw)

plt.legend(loc='best')
plt.show()

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
Input=[('scale', StandardScaler()), ('model', SVC(C=1))]
pipe = Pipeline(Input)
scores = cross_val_score(pipe, X, y, cv=3)

In [None]:
scores

In [None]:
scores.mean()

### Realice el análisis para la muestra de Vino Rojo con Arboles de Decision y Cross Validation  

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
base = pd.read_csv(url, delimiter = ';')

In [None]:
base.head(3)