In [1]:
import pandas as pd
from sklearn.datasets import load_diabetes, load_wine
from sklearn.model_selection import train_test_split as tts

In [2]:
diabetes=pd.DataFrame(load_diabetes().data, columns=load_diabetes().feature_names)
diabetes['target']=load_diabetes().target

diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [3]:
wine=pd.DataFrame(load_wine().data, columns=load_wine().feature_names)
wine['target']=load_wine().target

wine.target.unique()

array([0, 1, 2])

In [4]:
X_train_d, X_test_d, y_train_d, y_test_d = tts(diabetes.drop('target', axis=1), diabetes.target)

In [5]:
X_train_v, X_test_v, y_train_v, y_test_v = tts(wine.drop('target', axis=1), wine.target)

# Gradiente Descendente Estocástico SGD

![](data/grad1.png)

![](data/grad2.jpg)

Es una derivación numérica. Lo estocástico viene de la elección de la muestra, en vez de coger todos los datos solo coge uno. Reduce el costo computacional. Sirve como regresor o como clasificador.

##### SGDR

In [8]:
from sklearn.linear_model import SGDRegressor as SGDR

In [52]:
sgdr=SGDR(max_iter=8000)

sgdr.fit(X_train_d, y_train_d)

train_score=sgdr.score(X_train_d, y_train_d) #R2
test_score=sgdr.score(X_test_d, y_test_d)

print ('train R2:',train_score, '-- test R2:', test_score)

train R2: 0.49476166546094774 -- test R2: 0.5304516759533083


In [10]:
sgdr

SGDRegressor(alpha=0.0001, average=False, early_stopping=False, epsilon=0.1,
             eta0=0.01, fit_intercept=True, l1_ratio=0.15,
             learning_rate='invscaling', loss='squared_loss', max_iter=1000,
             n_iter_no_change=5, penalty='l2', power_t=0.25, random_state=None,
             shuffle=True, tol=0.001, validation_fraction=0.1, verbose=0,
             warm_start=False)

##### SGDC

In [16]:
from sklearn.linear_model import SGDClassifier as SGDC

In [39]:
sgdc=SGDC(max_iter=8000)

sgdc.fit(X_train_v, y_train_v)

train_score=sgdc.score(X_train_v, y_train_v) # Accuracy
test_score=sgdc.score(X_test_v, y_test_v)

print ('train Acc:',train_score, '-- test Acc:', test_score)

train Acc: 0.6766917293233082 -- test Acc: 0.6666666666666666


In [18]:
sgdc

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=8000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

# Naive Bayes

$$P(clase|data)= \frac{P(data|clase)·P(clase)}{P(data)}$$




+ P(data|clase) es $N(\mu, \sigma)$ (datos normalizados)

+ P(clase) es el a priori 

+ P(data) no se calcula

+ P(clase|data) el a posteriori, lo que se busca

Lo de Naive viene de la suposición de independencia condicional entre predictores, lo cuál no suele ser verdad.


$$\hat{y}=argmax [P(y) \prod_{i=1}^{n} P(x_i|y)]$$

### GNB (Gaussian Naive Bayes)

In [54]:
from sklearn.naive_bayes import GaussianNB as GNB

gnb=GNB()

gnb.fit(X_train_v, y_train_v)

train_score=gnb.score(X_train_v, y_train_v) # Accuracy
test_score=gnb.score(X_test_v, y_test_v)

print ('train Acc:',train_score, '-- test Acc:', test_score)

train Acc: 0.9924812030075187 -- test Acc: 0.9777777777777777


In [55]:
gnb

GaussianNB(priors=None, var_smoothing=1e-09)

### MNB (Multinomial Naive Bayes)

In [56]:
from sklearn.naive_bayes import MultinomialNB as MNB

mnb=MNB()

mnb.fit(X_train_v, y_train_v)

train_score=mnb.score(X_train_v, y_train_v) # Accuracy
test_score=mnb.score(X_test_v, y_test_v)

print ('train Acc:',train_score, '-- test Acc:', test_score)

train Acc: 0.8872180451127819 -- test Acc: 0.8444444444444444


In [57]:
mnb

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

### CNB (Complement Naive Bayes)

In [58]:
from sklearn.naive_bayes import ComplementNB as CNB

cnb=CNB()

cnb.fit(X_train_v, y_train_v)

train_score=cnb.score(X_train_v, y_train_v) # Accuracy
test_score=cnb.score(X_test_v, y_test_v)

print ('train Acc:',train_score, '-- test Acc:', test_score)

train Acc: 0.6616541353383458 -- test Acc: 0.5777777777777777


In [59]:
cnb

ComplementNB(alpha=1.0, class_prior=None, fit_prior=True, norm=False)

### BNB (Bernoulli Naive Bayes)

In [60]:
from sklearn.naive_bayes import BernoulliNB as BNB

bnb=BNB()

bnb.fit(X_train_v, y_train_v)

train_score=bnb.score(X_train_v, y_train_v) # Accuracy
test_score=bnb.score(X_test_v, y_test_v)

print ('train Acc:',train_score, '-- test Acc:', test_score)

train Acc: 0.44360902255639095 -- test Acc: 0.26666666666666666


In [61]:
bnb

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

# K-Nearest Neighbors

![](data/knn.png)

Este modelo se basa en al idea de vecindario, según sea lo que rodea al nuevo dato, así se clasificará.

Básicamente se trata de calcular las distancias (euclídea o L1 normalmente) entre los datos, para asignar el valor (etiqueta) al nuevo dato según sean sus vecinos.

Se suele tomar $k$ como un número impar para poder tomar una decisión en caso de empate.

# Ensemble Models


### Bagging  -  Boosting


![](data/boost.png)

### Stacking


![](data/stacking.png)