In [1]:
import ads


In [2]:
ads.__version__


'2.9.0'

## Aula  Lectura de la base de datos

In [1]:
from ads.dataset.factory import DatasetFactory

In [3]:
ds = DatasetFactory.open("data/datos_salud_final.csv")

loop1:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
ds.head()

In [None]:
type(ds)

In [None]:
ds.columns

In [None]:
ds = DatasetFactory.open("datos_salud_final.csv", target="riesgo_eac_decada")
ds.head()

In [None]:
type(ds)

In [None]:
ds.target.show_in_notebook()

## 1.4 Creando nuestro primer modelo

In [None]:
import numpy as np
np.random.seed(42)

In [None]:
train, test = ds.train_test_split()

In [None]:
train.X

In [None]:
test.X

In [None]:
train.y

In [None]:
test.y

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
primer_modelo = DecisionTreeClassifier()


In [None]:
primer_modelo.fit(train.X, train.y)

In [None]:
primer_modelo.score(test.X, test.y)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
matriz = ConfusionMatrixDisplay.from_predictions(estimetor=primer_modelo, X = test.X, y = test.y)

### 1.5 Mejorando el primer modelo

In [None]:
modelo_profundo = DecisionTreeClassifier(max_depth=100)

In [None]:
modelo_profundo.fit(train.X, train.y)

In [None]:
modelo_profundo.score(test.X, test.y)

In [None]:
matriz = ConfusionMatrixDisplay.from_estimator(estimetor=modelo_profundo, X = test.X, y = test.y)

### 1.6 Selección de Features

In [None]:
atributos = primer_modelo.feature_importances_
atributos

In [None]:
import pandas as pd

pd.Series(atributos, index=train.X.columns).sort_values()

In [None]:
train.X.drop(columns="diabetes", axis=1, inplace=True)

In [None]:
train.X.columns

In [None]:
primer_modelo.fit(train.X, train.y)

In [None]:
primer_modelo.score(test.X, test.y)

In [None]:
matriz = ConfusionMatrixDisplay.from_estimator(estimetor=primer_modelo, X = test.X, y = test.y)

### 2.1 Explorando AutoML de Oracle

In [2]:
from ads.common.data import ADSData

In [None]:
ads_data = ADSData.build(ds.to_pandas(),"riesgo_eac_decada")
type(ads_data)

In [None]:
from ads.automl.driver import AutoML

In [None]:
oracle_automl = AutoML(ads_data)

In [None]:
modelo_automl, baseline = oracle_automl.train(score_metric="accuracy", model_list=["DecisionTreeClassifier", 
                                                                                   "SVC", 
                                                                                   "logisticRegression",
                                                                                   "RandomForestClassifier"],
                                                                                   time_budget=150)

In [None]:
oracle_automl.print_trials(max_row=20, sort_column="Mean Validation Score")

In [None]:
oracle_automl.visualize_algorithm_selection_trials()

### 2.3 Features y modelo seleccionado

In [None]:
modelo_automl.show_in_notebook()

In [None]:
oracle_automl.visualize_algorithm_selection_trials()

### 3.1 ADS Evaluator

In [None]:
modelo_automl.feature_names()

In [None]:
train, test = ds[['sexo', 'edad', 'fumador', 'cigarrillos_por_dia',
       'uso_medicamento_presion', 'acv', 'hipertension', 'colesterol_total',
       'presion_arterial_sistolica', 'presion_arterial_diastolica', 'imc',
       'frecuencia_cardiaca', 'glicemia', 'categoria_de_fumador', "riesgo_eaac_decada"]].train_test_split()

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
modelo_automl.summary()

In [None]:
parametros = {"se usa el dicionario"}

In [None]:
mejor_modelo = RandomForestClassifier(**parametros)

In [None]:
primer_modelo = DecisionTreeClassifier(max_depth=100)

In [None]:
mejor_modelo.fit(train.X, train.y)
primer_modelo.fit(train.X, train.y)

In [None]:
from ads.common.model import ADSModel

In [None]:
ads_mejor_modelo = ADSModel.from_estimator(mejor_modelo, name="Mejor Modelo")
ads_primer_modelo = ADSModel.from_estimator(primer_modelo, name="Primer Modelo")

In [None]:
from ads.evaluations.evaluator import ADSEvaluator

In [None]:
evaluator = ADSEvaluator(test_data=test, models=[ads_mejor_modelo, ads_primer_modelo], training_data=train)

### 3.2 Analizando las metricas

In [None]:
evaluator.show_in_notebook()

#### 3.3 Evaluando más métricas

In [None]:
evaluator.metrics

### 3.4 Overfitting

In [None]:
primer_modelo = DecisionTreeClassifier(max_depth=5)
primer_modelo.fit(train.X, train.y)
ads_primer_modelo = ADSModel.from_estimator(primer_modelo, name="Primer Modelo")

In [None]:
evaluator = ADSEvaluator(test_data=test, models=[ads_mejor_modelo, ads_primer_modelo], training_data=train)

In [None]:
evaluator.metrics

### 4.1 Model Explainability

In [None]:
from ads.explanations.explainer import ADSExplainer

In [None]:
explainer = ADSExplainer(test, ads_mejor_modelo, train)

In [None]:
explainer_global = explainer.global_explanation()

In [None]:
importance = explainer_global.compute_feature_importance()

In [None]:
importance.show_in_notebook("detailed")

### 4.2 Valores de Features x Clasificación

In [None]:
pdp_edad = explainer_global.compute_partial_dependence("edad")

In [None]:
pdp_presion = explainer_global.compute_partial_dependence("presion_arterial_diastolica")

In [None]:
pdp_presion.show_in_notebook()

In [None]:
pdp_edad.show_in_notebook()

### 4.3 Explicando decisión individual

In [None]:
local_explainer = explainer.local_explanation()

In [None]:
test.X.iloc[[13]]

In [None]:
test.y.iloc[[13]]

In [None]:
explanation = local_explainer.explain(test.X.iloc[[13]], test.y.iloc[[13]])

In [None]:
explanation.show_in_notebook()

### 5.1 Creando nuestro artefacto