# Guardar y deployar un modelo predictivo

Para este ejemplo utilizaremos un árbol de decisión

In [1]:
import os
import pandas as pd
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

  from pandas import MultiIndex, Int64Index


In [2]:
df = pd.read_csv(os.path.join('../Datasets/diabetes.csv'))
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']
X = df[feature_cols]
Y = df['Outcome']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1) # 70% training, 30% test

## Optimización de parámetros

In [4]:
#https://github.com/conda-forge/hyperopt-feedstock
from hyperopt import fmin, tpe, hp, STATUS_OK,Trials

In [5]:
space ={
    'n_estimators':hp.quniform('n_estimators',100,1000,1),  
    'learning_rate':hp.quniform('learning_rate',0.025,0.5,0.025),
    'max_depth':hp.quniform('max_depth',1,13,1),
    'subsample': hp.quniform('subsample',0.5,1,0.05),
    'colsample_bytree':hp.quniform('colsample_bytree',0.5,1,0.05),
    'nthread':6,
    'silent':1
}

In [6]:
def objective(params):
    params['n_estimators'] = int(params['n_estimators'])
    params['max_depth'] = int(params['max_depth'])  
    classifier = XGBClassifier(**params)
    classifier.fit(X_train,Y_train)   
    accuracy = accuracy_score(Y_test, classifier.predict(X_test))
    return {'loss': 1-accuracy, 'status': STATUS_OK}

In [7]:
trials=Trials()
best=fmin(objective,space,algo=tpe.suggest,trials=trials,max_evals=20)

  0%|                                                                                                                   | 0/20 [00:00<?, ?trial/s, best loss=?]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


  0%|                                                                                                                   | 0/20 [00:00<?, ?trial/s, best loss=?]

                                                                                                                                                               



  0%|                                                                                                                   | 0/20 [00:00<?, ?trial/s, best loss=?]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



  5%|████▍                                                                                   | 1/20 [00:00<00:09,  2.02trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


  5%|████▍                                                                                   | 1/20 [00:00<00:09,  2.02trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



  5%|████▍                                                                                   | 1/20 [00:00<00:09,  2.02trial/s, best loss: 0.23376623376623373]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 10%|████████▊                                                                               | 2/20 [00:00<00:07,  2.46trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 10%|████████▊                                                                               | 2/20 [00:00<00:07,  2.46trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



 10%|████████▊                                                                               | 2/20 [00:00<00:07,  2.46trial/s, best loss: 0.23376623376623373]

 15%|█████████████▏                                                                          | 3/20 [00:01<00:05,  3.38trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 15%|█████████████▏                                                                          | 3/20 [00:01<00:05,  3.38trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



 15%|█████████████▏                                                                          | 3/20 [00:01<00:05,  3.38trial/s, best loss: 0.23376623376623373]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 20%|█████████████████▌                                                                      | 4/20 [00:01<00:04,  3.57trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 20%|█████████████████▌                                                                      | 4/20 [00:01<00:04,  3.57trial/s, best loss: 0.23376623376623373]

                                                                                                                                                               



 20%|█████████████████▌                                                                      | 4/20 [00:01<00:04,  3.57trial/s, best loss: 0.23376623376623373]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 25%|██████████████████████                                                                  | 5/20 [00:01<00:04,  3.66trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 25%|██████████████████████                                                                  | 5/20 [00:01<00:04,  3.66trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 25%|██████████████████████                                                                  | 5/20 [00:01<00:04,  3.66trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 30%|██████████████████████████▍                                                             | 6/20 [00:01<00:04,  3.48trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 30%|██████████████████████████▍                                                             | 6/20 [00:01<00:04,  3.48trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 30%|██████████████████████████▍                                                             | 6/20 [00:01<00:04,  3.48trial/s, best loss: 0.22077922077922074]

 35%|██████████████████████████████▊                                                         | 7/20 [00:02<00:03,  3.84trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 35%|██████████████████████████████▊                                                         | 7/20 [00:02<00:03,  3.84trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 35%|██████████████████████████████▊                                                         | 7/20 [00:02<00:03,  3.84trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 40%|███████████████████████████████████▏                                                    | 8/20 [00:02<00:03,  3.64trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 40%|███████████████████████████████████▏                                                    | 8/20 [00:02<00:03,  3.64trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 40%|███████████████████████████████████▏                                                    | 8/20 [00:02<00:03,  3.64trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 45%|███████████████████████████████████████▌                                                | 9/20 [00:02<00:03,  3.33trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 45%|███████████████████████████████████████▌                                                | 9/20 [00:02<00:03,  3.33trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 45%|███████████████████████████████████████▌                                                | 9/20 [00:02<00:03,  3.33trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 50%|███████████████████████████████████████████▌                                           | 10/20 [00:02<00:03,  3.33trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 50%|███████████████████████████████████████████▌                                           | 10/20 [00:02<00:03,  3.33trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 55%|███████████████████████████████████████████████▊                                       | 11/20 [00:03<00:02,  3.84trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 55%|███████████████████████████████████████████████▊                                       | 11/20 [00:03<00:02,  3.84trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 55%|███████████████████████████████████████████████▊                                       | 11/20 [00:03<00:02,  3.84trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 60%|████████████████████████████████████████████████████▏                                  | 12/20 [00:03<00:02,  3.27trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 60%|████████████████████████████████████████████████████▏                                  | 12/20 [00:03<00:02,  3.27trial/s, best loss: 0.22077922077922074]

                                                                                                                                                               



 60%|████████████████████████████████████████████████████▏                                  | 12/20 [00:03<00:02,  3.27trial/s, best loss: 0.22077922077922074]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 65%|████████████████████████████████████████████████████████▌                              | 13/20 [00:03<00:02,  3.29trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 65%|████████████████████████████████████████████████████████▌                              | 13/20 [00:03<00:02,  3.29trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 65%|████████████████████████████████████████████████████████▌                              | 13/20 [00:03<00:02,  3.29trial/s, best loss: 0.21212121212121215]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 70%|████████████████████████████████████████████████████████████▉                          | 14/20 [00:04<00:02,  2.95trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 70%|████████████████████████████████████████████████████████████▉                          | 14/20 [00:04<00:02,  2.95trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 70%|████████████████████████████████████████████████████████████▉                          | 14/20 [00:04<00:02,  2.95trial/s, best loss: 0.21212121212121215]

 75%|█████████████████████████████████████████████████████████████████▎                     | 15/20 [00:04<00:01,  3.62trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 75%|█████████████████████████████████████████████████████████████████▎                     | 15/20 [00:04<00:01,  3.62trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 75%|█████████████████████████████████████████████████████████████████▎                     | 15/20 [00:04<00:01,  3.62trial/s, best loss: 0.21212121212121215]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 80%|█████████████████████████████████████████████████████████████████████▌                 | 16/20 [00:04<00:01,  4.00trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 80%|█████████████████████████████████████████████████████████████████████▌                 | 16/20 [00:04<00:01,  4.00trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 80%|█████████████████████████████████████████████████████████████████████▌                 | 16/20 [00:04<00:01,  4.00trial/s, best loss: 0.21212121212121215]

 85%|█████████████████████████████████████████████████████████████████████████▉             | 17/20 [00:04<00:00,  4.84trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 85%|█████████████████████████████████████████████████████████████████████████▉             | 17/20 [00:04<00:00,  4.84trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 85%|█████████████████████████████████████████████████████████████████████████▉             | 17/20 [00:04<00:00,  4.84trial/s, best loss: 0.21212121212121215]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 90%|██████████████████████████████████████████████████████████████████████████████▎        | 18/20 [00:05<00:00,  3.89trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 90%|██████████████████████████████████████████████████████████████████████████████▎        | 18/20 [00:05<00:00,  3.89trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 90%|██████████████████████████████████████████████████████████████████████████████▎        | 18/20 [00:05<00:00,  3.89trial/s, best loss: 0.21212121212121215]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



 95%|██████████████████████████████████████████████████████████████████████████████████▋    | 19/20 [00:05<00:00,  4.10trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



Parameters: { "silent" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


 95%|██████████████████████████████████████████████████████████████████████████████████▋    | 19/20 [00:05<00:00,  4.10trial/s, best loss: 0.21212121212121215]

                                                                                                                                                               



 95%|██████████████████████████████████████████████████████████████████████████████████▋    | 19/20 [00:05<00:00,  4.10trial/s, best loss: 0.21212121212121215]

100%|███████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  4.82trial/s, best loss: 0.21212121212121215]

100%|███████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:05<00:00,  3.69trial/s, best loss: 0.21212121212121215]





  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



In [8]:
print(best)

best['n_estimators']=int(best['n_estimators'])
best['max_depth']=int(best['max_depth'])

{'colsample_bytree': 1.0, 'learning_rate': 0.05, 'max_depth': 10.0, 'n_estimators': 382.0, 'subsample': 0.6000000000000001}


In [9]:
tree_v5 = XGBClassifier(**best)
tree_v5

XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=1.0,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=10,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=382, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None,
              subsample=0.6000000000000001, tree_method=None,
              validate_parameters=None, verbosity=None)

In [10]:
tree_v5.fit(X_train, Y_train)



  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1.0,
              enable_categorical=False, gamma=0, gpu_id=-1,
              importance_type=None, interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=10,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=382, n_jobs=8, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=0.6000000000000001, tree_method='exact',
              validate_parameters=1, verbosity=None)

In [11]:
# métricas de desempeño
# accuracy
print('accuracy del clasificador - version 5 : {0:.2f}'.format(accuracy_score(Y_test, tree_v5.predict(X_test))))
# confusion matrix
print('matriz de confusión del clasificador - version 5: \n {0}'.format(confusion_matrix(Y_test, tree_v5.predict(X_test))))
# precision 
print('precision del clasificador - version 5 : {0:.2f}'.format(precision_score(Y_test, tree_v5.predict(X_test))))
# precision 
print('recall del clasificador - version 5 : {0:.2f}'.format(recall_score(Y_test, tree_v5.predict(X_test))))
# f1
print('f1 del clasificador - version 5 : {0:.2f}'.format(f1_score(Y_test, tree_v5.predict(X_test))))

accuracy del clasificador - version 5 : 0.79
matriz de confusión del clasificador - version 5: 
 [[125  21]
 [ 28  57]]
precision del clasificador - version 5 : 0.73
recall del clasificador - version 5 : 0.67
f1 del clasificador - version 5 : 0.70


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


## Guardar el clasificador

Python cuenta con librerias de serialización que facilitan guardar el clasificador en un archivo (pickle, joblib); este archivo puede ser restaurado para hacer predicciones.

In [12]:
import pickle

In [13]:
# Cree la carpeta 'clasificador' en el folder donde está el notebook
ruta_archivo_clasificador = os.path.join('tree_v5.pkl')
# Abrir el archivo para escribir contenido binario
archivo_clasificador = open(ruta_archivo_clasificador, 'wb')
# Guardar el clasificador
pickle.dump(tree_v5, archivo_clasificador)
# Cerrar el archivo
archivo_clasificador.close()

## Cargar el clasificador

In [14]:
#Abrir el archivo en modo lectura de contenido binario y cargar el clasificdor
archivo_clasificador = open(ruta_archivo_clasificador, "rb")
tree_v6 = pickle.load(archivo_clasificador)
archivo_clasificador.close()

In [15]:
# métricas de desempeño
# accuracy
print('accuracy del clasificador - version 6 : {0:.2f}'.format(accuracy_score(Y_test, tree_v6.predict(X_test))))
# confusion matrix
print('matriz de confusión del clasificador - version 6: \n {0}'.format(confusion_matrix(Y_test, tree_v6.predict(X_test))))
# precision 
print('precision del clasificador - version 6 : {0:.2f}'.format(precision_score(Y_test, tree_v6.predict(X_test))))
# precision 
print('recall del clasificador - version 6 : {0:.2f}'.format(recall_score(Y_test, tree_v6.predict(X_test))))
# f1
print('f1 del clasificador - version 6 : {0:.2f}'.format(f1_score(Y_test, tree_v6.predict(X_test))))

accuracy del clasificador - version 6 : 0.79
matriz de confusión del clasificador - version 6: 
 [[125  21]
 [ 28  57]]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


precision del clasificador - version 6 : 0.73
recall del clasificador - version 6 : 0.67
f1 del clasificador - version 6 : 0.70


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


##  Modificar el clasificador

In [16]:
tree_v6.n_estimators = 700
# Volver a entrenar el clasificador con los nuevos parámetros
tree_v6.fit(X_train,Y_train)

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1.0,
              enable_categorical=False, gamma=0, gpu_id=-1,
              importance_type=None, interaction_constraints='',
              learning_rate=0.05, max_delta_step=0, max_depth=10,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=700, n_jobs=8, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              subsample=0.6000000000000001, tree_method='exact',
              validate_parameters=1, verbosity=None)

In [17]:
# Guardar el nuevo clasificador
ruta_archivo_clasificador = os.path.join('tree_v6.pkl')
archivo_clasificador = open(ruta_archivo_clasificador, "wb")
pickle.dump(tree_v6, archivo_clasificador)
archivo_clasificador.close()

##  Opciones de despliegue

<img src="./img/35-opciones-despliegue.png" style="width:600px"/>

## Flask

Flask es un **framework** minimalista escrito en Python que permite crear aplicaciones web rápidamente y con un mínimo número de líneas de código - **Wikipedia**.

__[Flask](https://flask.palletsprojects.com/en/1.1.x/)__

Ahora, utilizando el clasificador guardado anteriormente en un archivo binario, se creará un servicio API REST en Flask para poder utilizarlo. Para hacerlo funcionar hacerlo, colocar el código en un archivo .py y hacerlo correr en la consola.

In [18]:
# http://flask.palletsprojects.com/en/1.1.x/quickstart/#quickstart
from flask import Flask, request, jsonify
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle

app = Flask(__name__)

classifier_filepath = os.path.join("tree_v6.pkl")
classifier_file = open(classifier_filepath, "rb")
classifier = pickle.load(open(classifier_filepath, "rb"))
classifier_file.close()

# Desactiva el API /predict del clasificador.
# retorna {"message": "/predict disabled"}, 200 OK
@app.route('/disable', methods=['GET'])
def disable():
    global ACTIVATED
    ACTIVATED = False
    return {'message': '/predict disabled'}, 200

# Activa el API /predict del clasificador.
# retorna {"message": "/predict enabled"}, 200 OK
@app.route('/enable', methods=['GET'])
def enable():
    global ACTIVATED
    ACTIVATED = True
    return {'message': '/predict enabled'}, 200

# Entrena el modelo con los nuevos hyper-parámetros y retorna la nueva exactitud. Por ejemplo, {"accuracy": 0.81}, 200 OK
# Se pueden enviar los siguiente hyper-parámetros: { "n_estimators": 10, "criterion": "gini", "max_depth": 7 }
# "criterion" puede ser "gini" o "entropy", "n_estimators" y "max_depth" son un número entero positivo
# Unicamente "max_depth" es opcional en cuyo caso se deberá emplear None. Si los otros hyper-parámetros no están presentes se retorna:
# {"message": "missing hyper-parameter"}, 404 BAD REQUEST
# Finalmente, sólo se puede ejecutar este endpoint después de ejecutar GET /disable. En otro caso retorna {"message": "can not reset an enabled classifier"}, 400 BAD REQUEST
@app.route('/reset', methods=['POST'])
def reset():
    if ACTIVATED:
        return {"message": "can not reset an enabled classifier"}, 400
    json_request = request.get_json(force=True)
    if 'criterion' not in json_request or 'n_estimators' not in json_request:
        return {"message": "missing hyper-parameter"}, 400

    classifier.n_estimators = json_request.get('n_estimators')
    classifier.criterion = json_request.get('criterion')
    classifier.max_depth = json_request.get('max_depth')
 
    df = pd.read_csv(os.path.join("diabetes.csv"))
    feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age',
                    'Glucose', 'BloodPressure', 'DiabetesPedigreeFunction']
    X = df[feature_cols]
    Y = df["Outcome"]
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.3, random_state=1)

    classifier.fit(X_train, Y_train)
    return {'accuracy': accuracy_score(Y_test, classifier.predict(X_test))}, 200

# Recibe una lista de observaciones y retorna la clasificación para cada una de ellas.
# Los valores en cada observación se corresponden con la siguientes variables:
#['Pregnancies', 'Insulin', 'BMI', 'Age', 'Glucose', 'BloodPressure', 'DiabetesPedigreeFunction']
# Por ejemplo: para estas observaciones:
# [
#	[7,135,26.0,51,136,74,0.647],
#	[9,175,34.2,36,112,82,0.260]
# ]
@app.route('/predict', methods=['POST'])
def predict():
    if not ACTIVATED:
        return {"message": "classifier is not enabled"}, 400
    predict_request = request.get_json(force=True)
    predict_response = classifier.predict(predict_request)
    return {'cases': predict_request,
            'diabetes': predict_response.tolist()}


if __name__ == '__main__':
    app.run(port=8080, debug=True)

 * Serving Flask app '__main__' (lazy loading)


 * Environment: production




[2m   Use a production WSGI server instead.[0m


 * Debug mode: on


 * Running on http://127.0.0.1:8080/ (Press CTRL+C to quit)


 * Restarting with stat


Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.9/site-packages/traitlets/config/application.py", line 845, in launch_instance
    app.initialize(argv)
  File "/usr/local/lib/python3.9/site-packages/traitlets/config/application.py", line 88, in inner
    return method(app, *args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 632, in initialize
    self.init_sockets()
  File "/usr/local/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 282, in init_sockets
    self.shell_port = self._bind_socket(self.shell_socket, self.shell_port)
  File "/usr/local/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 229, in _bind_socket
    return self._try_bind_socket(s, port)
  File "/usr/local/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 205, in _try_bind_socket
    s.bind("tcp://%s:%i" % (self.

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
