# 3.1 - Más Modelos

In [None]:
import pandas as pd

from sklearn.datasets import load_diabetes, load_wine
from sklearn.model_selection import train_test_split as tts

In [None]:
diabetes=pd.DataFrame(load_diabetes().data, columns=load_diabetes().feature_names)
diabetes['target']=load_diabetes().target

diabetes.head()

In [None]:
wine=pd.DataFrame(load_wine().data, columns=load_wine().feature_names)
wine['target']=load_wine().target

wine.head()

In [None]:
X_train_d, X_test_d, y_train_d, y_test_d = tts(diabetes.drop('target', axis=1), diabetes.target)

In [None]:
X_train_v, X_test_v, y_train_v, y_test_v = tts(wine.drop('target', axis=1), wine.target) 

# Gradiente Descendente Estocástico SGD

![grad1](images/grad1.png)

![grad2](images/grad2.jpg)

Es una derivación numérica. Lo estocástico viene de la elección de la muestra, en vez de coger todos los datos solo coge uno. Reduce el costo computacional. Sirve como regresor o como clasificador.

##### SGDR

In [None]:
from sklearn.linear_model import SGDRegressor as SGDR

In [None]:
sgdr=SGDR(max_iter=10000)

sgdr.fit(X_train_d, y_train_d)

train_score=sgdr.score(X_train_d, y_train_d)
test_score=sgdr.score(X_test_d, y_test_d)

'train R2: ', train_score, '---  test R2: ', test_score

In [None]:
#help(sgdr)

##### SGDC

In [None]:
from sklearn.linear_model import SGDClassifier as SGDC

In [None]:
sgdc=SGDC(max_iter=10000)

sgdc.fit(X_train_v, y_train_v)

train_score=sgdc.score(X_train_v, y_train_v)
test_score=sgdc.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(sgdc)

# Naive Bayes

$$P(clase|data)= \frac{P(data|clase)·P(clase)}{P(data)}$$

+ P(data|clase) es $N(\mu, \sigma)$ (datos normalizados)

+ P(clase) es el a priori 

+ P(data) no se calcula

+ P(clase|data) el a posteriori, lo que se busca

Lo de Naive viene de la suposición de independencia condicional entre predictores, lo cuál no suele ser verdad.


$$\hat{y}=argmax [P(y) \prod_{i=1}^{n} P(x_i|y)]$$

### GNB (Gaussian Naive Bayes)

In [None]:
from sklearn.naive_bayes import GaussianNB as GNB

gnb=GNB()

gnb.fit(X_train_v, y_train_v)

train_score=gnb.score(X_train_v, y_train_v)
test_score=gnb.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(gnb)

### MNB (Multinomial Naive Bayes)

In [None]:
from sklearn.naive_bayes import MultinomialNB as MNB

mnb=MNB()

mnb.fit(X_train_v, y_train_v)

train_score=mnb.score(X_train_v, y_train_v)
test_score=mnb.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(mnb)

### CNB (Complement Naive Bayes)

In [None]:
from sklearn.naive_bayes import ComplementNB as CNB

cnb=CNB()

cnb.fit(X_train_v, y_train_v)

train_score=cnb.score(X_train_v, y_train_v)
test_score=cnb.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(cnb)

### BNB (Bernoulli Naive Bayes)

In [None]:
from sklearn.naive_bayes import BernoulliNB as BNB

bnb=BNB(binarize=3)

bnb.fit(X_train_v, y_train_v)

train_score=bnb.score(X_train_v, y_train_v)
test_score=bnb.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(bnb)

# K-Nearest Neighbors

![knn](images/knn.png)

Este modelo se basa en al idea de vecindario, según sea lo que rodea al nuevo dato, así se clasificará.

Básicamente se trata de calcular las distancias (euclídea o L1 normalmente) entre los datos, para asignar el valor (etiqueta) al nuevo dato según sean sus vecinos.

Se suele tomar $k$ como un número impar para poder tomar una decisión en caso de empate.

#### KNNR

In [None]:
from sklearn.neighbors import KNeighborsRegressor as KNNR

In [None]:
knnr=KNNR(n_neighbors=7)

knnr.fit(X_train_d, y_train_d)

train_score=knnr.score(X_train_d, y_train_d)
test_score=knnr.score(X_test_d, y_test_d)

'train R2: ', train_score, '---  test R2: ', test_score

In [None]:
#help(knnr)

#### KNNC

In [None]:
from sklearn.neighbors import KNeighborsClassifier as KNNC

In [None]:
knnc=KNNC(n_neighbors=13)

knnc.fit(X_train_v, y_train_v)

train_score=knnc.score(X_train_v, y_train_v)
test_score=knnc.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(knnc)

# Ensemble Models


### Bagging  -  Boosting


![boost](images/boost.png)

#### Gradient Boost Regressor

In [None]:
from sklearn.ensemble import GradientBoostingRegressor as GBR

In [None]:
gbr=GBR(n_estimators=100, learning_rate=0.01)

gbr.fit(X_train_d, y_train_d)

train_score=gbr.score(X_train_d, y_train_d)
test_score=gbr.score(X_test_d, y_test_d)

'train R2: ', train_score, '---  test R2: ', test_score

In [None]:
#help(gbr)

#### Gradient Boost Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier as GBC

In [None]:
gbc=GBC(n_estimators=100, learning_rate=0.1, max_depth=3)

gbc.fit(X_train_v, y_train_v)

train_score=gbc.score(X_train_v, y_train_v)
test_score=gbc.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(gbc)

## Extreme Gradient Boosting

In [None]:
!pip install xgboost

#### XGBR

In [None]:
from xgboost import XGBRegressor as XGBR

xgbr=XGBR()

xgbr.fit(X_train_d, y_train_d)

train_score=xgbr.score(X_train_d, y_train_d)
test_score=xgbr.score(X_test_d, y_test_d)

'train R2: ', train_score, '---  test R2: ', test_score

In [None]:
#help(xgbr)

#### XGBC

In [None]:
from xgboost import XGBClassifier as XGBC

xgbc=XGBC(use_label_encoder=False, n_estimators=10)

xgbc.fit(X_train_v, y_train_v)

train_score=xgbc.score(X_train_v, y_train_v)
test_score=xgbc.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(xgbc)

### Catboost

##### CTR

In [None]:
#!pip install catboost

In [None]:
from catboost import CatBoostRegressor as CTR

ctr=CTR(verbose=0)

ctr.fit(X_train_d, y_train_d)

train_score=ctr.score(X_train_d, y_train_d)
test_score=ctr.score(X_test_d, y_test_d)

'train R2: ', train_score, '---  test R2: ', test_score

In [None]:
ctr  # sus hiperparametros en https://catboost.ai/

##### CTC

In [None]:
from catboost import CatBoostClassifier as CTC

ctc=CTC(verbose=0)

ctc.fit(X_train_v, y_train_v)

train_score=ctc.score(X_train_v, y_train_v)
test_score=ctc.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

## LightGBM

In [None]:
!pip install lightgbm

#### LGBR

In [None]:
from lightgbm import LGBMRegressor as LGBMR

lgbmr=LGBMR(n_estimators=1000)

lgbmr.fit(X_train_d, y_train_d)

train_score=lgbmr.score(X_train_d, y_train_d)
test_score=lgbmr.score(X_test_d, y_test_d)

'train R2: ', train_score, '---  test R2: ', test_score

In [None]:
#help(lgbmr)

#### LGBC

In [None]:
from lightgbm import LGBMClassifier as LGBMC

lgbmc=LGBMC()

lgbmc.fit(X_train_v, y_train_v)

train_score=lgbmc.score(X_train_v, y_train_v)
test_score=lgbmc.score(X_test_v, y_test_v)

'train Acc: ', train_score, '---  test Acc: ', test_score

In [None]:
#help(lgbmc)

# PyCaret

https://pycaret.org/tutorial/

https://colab.research.google.com/drive/1GqQ3XAIzg4krBbnOpKyeRqT0qBQhdwYL#scrollTo=lUvE187JEQm3

In [None]:
#!pip install pycaret

In [None]:
from pycaret.datasets import get_data

dataset=get_data('credit')

In [None]:
dataset.shape

In [None]:
data=dataset.sample(frac=0.95, random_state=786).reset_index(drop=True)

data_test=dataset.drop(data.index).reset_index(drop=True)

data.shape, data_test.shape

In [None]:
from pycaret.classification import *

In [None]:
credito=setup(data=data, target='default', session_id=123)

In [None]:
compare_models()

In [None]:
dt=create_model('dt')

In [None]:
knn=create_model('knn')

In [None]:
rf=create_model('rf')

In [None]:
plot_model(rf, plot='auc')

In [None]:
plot_model(rf, plot='pr')

In [None]:
plot_model(rf, plot='feature')

In [None]:
plot_model(rf, plot='confusion_matrix')

In [None]:
predict_model(rf);

In [None]:
final_rf=finalize_model(rf)

print(final_rf)

In [None]:
predict_model(final_rf);

# H2o


https://www.h2o.ai/

In [None]:
#!pip install h2o

In [None]:
import h2o
from h2o.automl import H2OAutoML

In [None]:
h2o.init()

In [None]:
url='https://s3.amazonaws.com/erin-data/higgs/'


# parsear datos a h2o
train=h2o.import_file(url + 'higgs_train_10k.csv')
test=h2o.import_file(url + 'higgs_test_5k.csv')


X=train.columns
y='response'
X.remove(y)

In [None]:
# factor para clasificacion binaria

train[y]=train[y].asfactor()
test[y]=test[y].asfactor()

In [None]:
# 20 modelos

aml=H2OAutoML(max_models=20, seed=1)
aml.train(x=X, y=y, training_frame=train)

In [None]:
lb=aml.leaderboard

lb.head(rows=lb.nrows)

In [None]:
aml.leader

In [None]:
aml.leader.predict(test)

In [None]:
test

# Stacking


![stacking](images/stacking.png)

In [None]:
df=pd.read_csv('../data/diamonds.csv')

df.head()

In [None]:
df=df._get_numeric_data().drop('Unnamed: 0', axis=1)

df.head()

In [None]:
X=df.drop('price', axis=1)

y=df.price

In [None]:
X_train, X_temp, y_train, y_temp = tts(X, y,  test_size=0.5)

In [None]:
X_meta, X_test, y_meta, y_test = tts(X_temp, y_temp,  test_size=0.5)

In [None]:
n_df=pd.DataFrame()

# predicciones de los modelos

n_df['xgbr']=xgbr.fit(X_train, y_train).predict(X_meta)
n_df['lgbmr']=lgbmr.fit(X_train, y_train).predict(X_meta)
n_df['ctr']=ctr.fit(X_train, y_train).predict(X_meta)
n_df['gbr']=gbr.fit(X_train, y_train).predict(X_meta)
#mas

n_df.head()

In [None]:
from sklearn.linear_model import LinearRegression as LinReg

In [None]:
linreg=LinReg()

linreg.fit(n_df.values, y_meta)   # datos del df de predicciones y el target original (!!!!!dimensiones)

In [None]:
# Fase de testeo, solo predict

n_df['xgbr']=xgbr.predict(X_test)
n_df['lgbmr']=lgbmr.predict(X_test)
n_df['ctr']=ctr.predict(X_test)
n_df['gbr']=gbr.predict(X_test)

In [None]:
# prediccion final

linreg.predict(n_df.values)

In [None]:
linreg.score(n_df.values, y_test)

In [None]:
y_pred=linreg.predict(n_df.values)

from sklearn.metrics import mean_squared_error as mse

mse(y_pred, y_test, squared=False)

#### mlxtend

In [None]:
#!pip install mlxtend

In [None]:
from mlxtend.regressor import StackingRegressor

import warnings
warnings.simplefilter('ignore')

In [None]:
metamodelo=StackingRegressor(regressors=[xgbr, lgbmr, ctr, gbr], meta_regressor=linreg)

In [None]:
metamodelo.fit(X_train, y_train)

In [None]:
X_test.columns=['f0', 'f1', 'f2', 'f3', 'f4', 'f5']

metamodelo.score(X_test, y_test)  # R2

In [None]:
X_test.columns=['carat', 'depth', 'table', 'x', 'y', 'z']

print(xgbr.score(X_test, y_test))
print(lgbmr.score(X_test, y_test))
print(ctr.score(X_test, y_test))
print(gbr.score(X_test, y_test))