# Machine Learning

<img src="https://miro.medium.com/max/1400/1*cG6U1qstYDijh9bPL42e-Q.jpeg">

<img src="https://cdn.educba.com/academy/wp-content/uploads/2019/08/Categories-of-Machine-Learning.jpg">


<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/0/05/Scikit_learn_logo_small.svg/1200px-Scikit_learn_logo_small.svg.png">

https://scikit-learn.org/stable/tutorial/machine_learning_map/index.html

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC

In [2]:
X, y = load_iris(return_X_y=True)

In [3]:
X[0], y[0]

(array([5.1, 3.5, 1.4, 0.2]), 0)

In [4]:
clf = SVC()
clf.fit(X, y)
clf.predict([[5, 3, 2, 0.6]])

array([0])

## Train test split

In [5]:
len(X)

150

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [7]:
len(X_train)

100

In [8]:
clf = SVC()
clf.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

## Evaluando nuestro modelo

In [9]:
from sklearn.metrics import accuracy_score
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

1.0

## Preprocesamiento de los datos

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [11]:
X_train[0]

array([-0.13835603, -0.26550845,  0.22229072,  0.10894943])

## Uniendo todos los pasos: Pipeline

In [12]:
from sklearn.pipeline import Pipeline
pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])
pipe.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svc',
                 SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='rbf', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [13]:
y_pred = pipe.predict(X_test)
y_pred

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2])

# Qué parámetros usar?



In [14]:
from sklearn import svm, datasets

In [23]:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
svc = svm.SVC(C=0.0001, kernel='poly')
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy_score(y_test, y_pred)

0.3

Resultados malos, no? 
### Ejercicio: 
Usar el siguiente diccionario para probar todas las combinaciones de parametros en el modelo y ver cual o cuales tienen mejores resultados

In [24]:
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}

In [None]:
# Ejercicio aqui

In [27]:
from sklearn.model_selection import GridSearchCV
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(X_train, y_train)
clf.best_estimator_

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [28]:
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

1.0