In [52]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.impute import SimpleImputer
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report

In [53]:
df = pd.read_csv('/content/heart.csv')

## Создаем инстанс для заполнения пропусков

In [54]:
class HeartDataImputer:
    def __init__(self):
        self.features = {
            'numeric': ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak', 'NumMajorVessels'],
            'categorical': ['Sex', 'CheastPainType', 'FastingBS', 'RestingECG',
                          'ExerciseAngina', 'ST_Slope', 'Thal']
        }

        self.numeric_imputer = SimpleImputer(strategy='mean')
        self.categorical_imputer = SimpleImputer(strategy='most_frequent')
        self.is_fitted = False

    def fit(self, data: pd.DataFrame):
        """
        Обучает импутеры на тренировочных данных

        Parameters:
        data (pd.DataFrame): Тренировочные данные
        """

        required_columns = self.features['numeric'] + self.features['categorical']
        missing_columns = set(required_columns) - set(data.columns)
        if missing_columns:
            raise ValueError(f"Missing required columns: {missing_columns}")

        self.numeric_imputer.fit(data[self.features['numeric']])
        self.categorical_imputer.fit(data[self.features['categorical']])

        self.statistics = {
            'numeric': data[self.features['numeric']].mean().to_dict(),
            'categorical': data[self.features['categorical']].mode().iloc[0].to_dict()
        }

        self.is_fitted = True
        return self

    def transform(self, patient_data: dict) -> dict:
        """
        Заполняет пропущенные значения в данных пациента

        Parameters:
        patient_data (dict): Данные пациента с возможными пропущенными значениями

        Returns:
        dict: Данные пациента с заполненными пропущенными значениями
        """
        if not self.is_fitted:
            raise ValueError("Imputer must be fitted before transform")

        filled_data = patient_data.copy()

        # Заполняем пропущенные числовые значения
        for feature in self.features['numeric']:
            if feature not in filled_data or filled_data[feature] is None:
                filled_data[feature] = self.statistics['numeric'][feature]

        # Заполняем пропущенные категориальные значения
        for feature in self.features['categorical']:
            if feature not in filled_data or filled_data[feature] is None:
                filled_data[feature] = self.statistics['categorical'][feature]

        return filled_data

In [55]:
imputer = HeartDataImputer()

imputer.fit(df)

<__main__.HeartDataImputer at 0x7b4f75f28580>

## Обучаем StandardScaler

In [56]:
X = df.drop('Target', axis=1)
y = df['Target']

In [57]:
numerical_features = ['Age', 'RestingBP', 'Cholesterol','MaxHR', 'Oldpeak']

scaler = StandardScaler()

X[numerical_features] = scaler.fit_transform(X[numerical_features])

## Создаем модель

In [58]:
model = LogisticRegression(random_state=42, max_iter=1000)

In [59]:
model.fit(X, y)

## Класс для предсказания по heart.csv

In [60]:
class HeartBasedPredictor:
    def __init__(self, model, scaler, imputer):
        self.model = model
        self.scaler = scaler
        self.imputer = imputer

    def preprocess(self, features):
        """
        Preprocesses input features

        Parameters:
        features (dict): Dictionary with patient features

        Returns:
        np.array: Preprocessed features array
        """

        features = self.imputer.transform(features)

        feature_order = [
            'Age', 'Sex', 'CheastPainType', 'RestingBP', 'Cholesterol',
            'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak',
            'ST_Slope', 'NumMajorVessels', 'Thal'
        ]

        numerical_features = ['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']

        X = np.array([[features[feature] for feature in feature_order]])
        numerical_indices = [feature_order.index(feat) for feat in numerical_features]
        X[:, numerical_indices] = self.scaler.transform(X[:, numerical_indices])

        return X

    def predict(self, features):
        """
        Makes prediction for single patient

        Parameters:
        features (dict): Dictionary with patient features

        Returns:
        int: 0 for healthy, 1 for heart disease
        """
        X = self.preprocess(features)
        return self.model.predict_proba(X)[0][1]

In [61]:
predictor = HeartBasedPredictor(model, scaler, imputer)

## Тестируем

In [62]:
test_patient_data = {
    'Age': 45,
    'Sex': 0,
    'CheastPainType': None,
    'RestingBP': 130,
    'Cholesterol': None,
    'FastingBS': 1,
    'RestingECG': None,
    'MaxHR': 145,
    'ExerciseAngina': None,
    'Oldpeak': 2.5,
    'ST_Slope': None,
    'NumMajorVessels': None,
    'Thal': 2,
}

predictor.predict(test_patient_data)



0.4955939219651414

## Класс для композиции моделей

In [64]:
class PredictorComposer:
    def __init__(self, heart_based_predictor, dv_predictor):
        self.heart_based_predictor = heart_based_predictor
        self.dv_predictor = dv_predictor

    def select_heart_based_features(self, all_features) -> dict:
        return {
            'Age': all_features['Age'],
            'Sex': all_features['Sex'],
            'CheastPainType': all_features['CheastPainType'],
            'RestingBP': all_features['RestingBP'],
            'Cholesterol': all_features['Cholesterol'],
            'FastingBS': all_features['FastingBS'],
            'RestingECG': all_features['RestingECG'],
            'MaxHR': all_features['MaxHR'],
            'ExerciseAngina': all_features['ExerciseAngina'],
            'Oldpeak': all_features['Oldpeak'],
            'ST_Slope': all_features['ST_Slope'],
            'NumMajorVessels': all_features['NumMajorVessels'],
            'Thal': all_features['Thal']
        }

    def select_dv_features(self, all_features) -> dict:
        pass

    def predict(self, all_features):
        """
        Makes prediction for single patient

        Parameters:
        features (dict): Dictionary with patient features

        Returns:
        int: 0 for healthy, 1 for heart disease
        """

        heart_based_features = self.select_heart_based_features(all_features)
        heart_based_predict = self.heart_based_predictor.predict(heart_based_features)

        dv_features = self.select_dv_features(all_features)
        dv_predict = self.dv_predictor.predict(dv_features)

        return round((heart_based_predict + dv_predict) / 2)