# Export de todos los modelos:

## Librerías y dataframes:

In [58]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import joblib

In [22]:
df = pd.read_csv("Heart_std.csv")
df2 = pd.read_csv("Heart_limpio.csv")

## 1. Modelo de Regresión lineal:

In [7]:
X = df[['ExerciseAngina', 'Oldpeak','Sex','Age','FastingBS',"ST_Slope","RestingBP"]]

y = df['HeartDisease']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=3)

modelo_1 = LinearRegression(n_jobs=-1) 
modelo_1.fit(X_train, y_train)

print("El modelo tiene un porcentaje de acierto del",round(modelo_1.score(X_test, y_test),2),"%")

predictions = modelo_1.predict(X_test)
predictions

print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))

El modelo tiene un porcentaje de acierto del 0.51 %
MAE: 0.5669297197830977
MSE: 0.49061221943995825
RMSE: 0.7004371630917068


In [8]:
joblib.dump(modelo_1, "1.Modelo_de_Regresión_lineal.pkl")

['1.Modelo_de_Regresión_lineal.pkl']

## 2. Modelo de Regresión polinómica:

In [12]:
X = df[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']]

y = df["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

poly_reg = PolynomialFeatures(degree=2)
poly_reg.fit(X_train)
X_poly_train = poly_reg.transform(X_train) 

modelo_2 = LinearRegression()
modelo_2.fit(X_poly_train, y_train)

print('MAE train', mean_absolute_error(y_train, modelo_2.predict(X_poly_train)))
print('MAPE train', mean_absolute_percentage_error(y_train, modelo_2.predict(X_poly_train)))
print('MSE train', mean_squared_error(y_train, modelo_2.predict(X_poly_train)))
print('RMSE train', np.sqrt(mean_squared_error(y_train, modelo_2.predict(X_poly_train))))
print('R2 score train', 100*r2_score(y_train, modelo_2.predict(X_poly_train)),"%")

MAE train 0.4480744016387889
MAPE train 0.4514036114254938
MSE train 0.37865221350167677
RMSE train 0.6153472300268172
R2 score train 62.42042293349811 %


In [13]:
joblib.dump(modelo_2, "2.Modelo_de_Regresión_polinómica.pkl")

['2.Modelo_de_Regresión_polinómica.pkl']

## 3. Modelo de Regresión Ridge:

In [15]:
X = df[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']]

y = df["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

modelo_3 = Ridge(alpha=100)
modelo_3.fit(X_train, y_train)

print("El modelo ridge tiene un porcentaje de acierto del",100*round(modelo_3.score(X_test, y_test),2),"%")

print('MAE test', mean_absolute_error(y_test, modelo_3.predict(X_test)))
print('MSE test', mean_squared_error(y_test, modelo_3.predict(X_test)))
print('RMSE test', np.sqrt(mean_squared_error(y_test, modelo_3.predict(X_test))))

El modelo ridge tiene un porcentaje de acierto del 48.0 %
MAE test 0.5759434171299143
MSE test 0.506623917827101
RMSE test 0.7117751877012158


In [16]:
joblib.dump(modelo_3, "3.Modelo_de_Regresión_ridge.pkl")

['3.Modelo_de_Regresión_ridge.pkl']

## 4. Modelo de Regresión Logística:

In [26]:
X = df2[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

modelo_4 = LogisticRegression(random_state=20)
modelo_4.fit(X_train, y_train);

print("El modelo tiene un porcentaje de acierto del",modelo_4.score(X_test, y_test)*100,"%")

El modelo tiene un porcentaje de acierto del 84.78260869565217 %


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [27]:
joblib.dump(modelo_4, "4.Modelo_de_Regresión_logística.pkl")

['4.Modelo_de_Regresión_logística.pkl']

## 5. Árbol de decisiones:

In [36]:
X = df2[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 1)

modelo_5 = DecisionTreeRegressor(max_depth=24, random_state=7)
modelo_5.fit(X_train, y_train)

y_pred = modelo_5.predict(X_test)

print("modelo_2 Accuracy:",metrics.accuracy_score(y_test, y_pred))

print("MAE", mean_absolute_error(y_test, y_pred))
print("MAPE", mean_absolute_percentage_error(y_test, y_pred))
print("MSE", mean_squared_error(y_test, y_pred))
print("RMSE", np.sqrt(mean_squared_error(y_test, y_pred)))

modelo_2 Accuracy: 0.8315217391304348
MAE 0.16847826086956522
MAPE 269236934244975.4
MSE 0.16847826086956522
RMSE 0.4104610345325914


In [38]:
joblib.dump(modelo_5, "5.Modelo_de_Arbol_de_decisiones.pkl")

['5.Modelo_de_Arbol_de_decisiones.pkl']

## 6. Bagging Classifier:

In [41]:
X = df2[['ST_Slope', 'Oldpeak', "Cholesterol", 'Age', "RestingBP", 'MaxHR',"Sex"]]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 2)

estimator = DecisionTreeClassifier(max_depth=60,random_state=56)

modelo_6 = BaggingClassifier(
    base_estimator = estimator,
    n_estimators=50,
    max_samples=300,
    bootstrap=True,
    max_features = 6,
    random_state=1)


modelo_6.fit(X_train, y_train)
y_pred = modelo_6.predict(X_test)
accuracy_score(y_test, y_pred)

0.8442028985507246

In [43]:
joblib.dump(modelo_6, "6.Modelo_Bagging_Classifier.pkl")

['6.Modelo_Bagging_Classifier.pkl']

## 7. Random Forest Classifier:

In [46]:
X = df2[['ST_Slope', 'Oldpeak', "Cholesterol", 'Age', "RestingBP", 'MaxHR',"Sex"]]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 2)

modelo_7 = RandomForestClassifier(n_estimators=500,
                                 max_leaf_nodes=16,
                                 random_state=42)
modelo_7.fit(X_train, y_train)

y_pred_rf = modelo_7.predict(X_test)

accuracy_score(y_test, y_pred_rf)

0.8260869565217391

In [47]:
joblib.dump(modelo_7, "7.Modelo_Random_Forest_Classifier.pkl")

['7.Modelo_Random_Forest_Classifier.pkl']

## 8. Random Forest Regressor:

In [49]:
X = df2[['ST_Slope', 'Oldpeak', "Cholesterol", 'Age', "RestingBP", 'MaxHR',"Sex"]]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 2)

modelo_8 = RandomForestRegressor(n_estimators=100,
                                 max_leaf_nodes=2,
                                 random_state=51)
modelo_8.fit(X_train, y_train)

y_pred_reg = modelo_8.predict(X_train)
mean_absolute_error(y_train, y_pred_reg)

0.3214610875532118

In [50]:
joblib.dump(modelo_8, "8.Modelo_Random_Forest_Regressor.pkl")

['8.Modelo_Random_Forest_Regressor.pkl']

## 9. Ada Boost Classifier:

In [52]:
X = df2[['ST_Slope', 'Oldpeak', "Cholesterol", 'Age', "RestingBP", 'MaxHR',"Sex"]]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 2)

estimator = DecisionTreeClassifier(max_depth=1)

modelo_9 = AdaBoostClassifier(base_estimator = estimator,
                             n_estimators=200,
                             learning_rate=0.5,
                             random_state=42)

modelo_9.fit(X_train, y_train)

y_pred = modelo_9.predict(X_test)
accuracy_score(y_test, y_pred)

0.8115942028985508

In [53]:
joblib.dump(modelo_9, "9.Modelo_Ada_Boost_Classifier.pkl")

['9.Modelo_Ada_Boost_Classifier.pkl']

## 10. Modelo K Neighbors Classifier:

In [56]:
X = df2[['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope']]

y = df2["HeartDisease"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=0)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


modelo_10 = KNeighborsClassifier(n_neighbors=9)
modelo_10.fit(X_train,y_train)
print("El modelo 2 tiene un",round(100*modelo_10.score(X_test,y_test),2),"%","de acierto.")

El modelo 2 tiene un 86.59 % de acierto.


In [57]:
joblib.dump(modelo_10, "10.Modelo_K_Neighbors_Classifier.pkl")

['10.Modelo_K_Neighbors_Classifier.pkl']