# Modelos de Clasificación

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, roc_auc_score

import joblib


In [3]:

class MachineLearningProcessor:

    def __init__(self, model_name: str):
        self.model_name = model_name
        self.data = pd.read_csv("../data/heart.csv")
        self.pre_process_data()

    def pre_process_data(self):
        self.processed_data = self.data.drop(
            ["Sex", "ChestPainType", "RestingECG", "ExerciseAngina", "ST_Slope" ],
            axis=1
        )
        
    def split_data(self):
        X = self.processed_data.drop("HeartDisease", axis=1)
        y = self.processed_data["HeartDisease"]
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, random_state=100)

    def train(self, model):
        print("1. Separando datos de train y test..")
        self.split_data()
        print(f"2. Entrenando model {self.model_name}")
        self.fitted_model = model.fit(self.X_train, self.y_train)
        print("3. Entrenamiento finalizado")

    def predict(self):
        self.predictions = self.fitted_model.predict(self.X_test)
        print(classification_report(self.predictions, self.y_test))

In [63]:
processor.data

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [4]:
processor = MachineLearningProcessor(model_name="Model Baseline")

In [5]:
processor.train(model=DummyClassifier())

1. Separando datos de train y test..
2. Entrenando model Model Baseline
3. Entrenamiento finalizado


In [66]:
processor.predict()

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.58      0.73       230

    accuracy                           0.58       230
   macro avg       0.50      0.29      0.37       230
weighted avg       1.00      0.58      0.73       230



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [67]:
logistic_regression_processor = MachineLearningProcessor(model_name="Regresión logística")
logistic_regression_processor.train(model=LogisticRegression())

1. Separando datos de train y test..
2. Entrenando model Regresión logística
3. Entrenamiento finalizado


In [None]:
logistic_regression_processor.predict()

## Guardado de modelos

In [1]:
def save_ml_model(ml_object, name):
    """
    Guarda modelos de ML
    """
    joblib.dump(ml_object, f"../models/{name}.joblib")
    print("Model guaradado exitosamente")
save_ml_model()