In [None]:
import pandas as pd

df = pd.read_csv('/content/heart.csv')

In [None]:
df

Unnamed: 0,Age,Sex,CheastPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,NumMajorVessels,Thal,Target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Скалирование числовых признаков

numerical_features = ['Age', 'RestingBP', 'Cholesterol','MaxHR', 'Oldpeak']

scaler = StandardScaler()
df_encoded = df.copy()
df_encoded[numerical_features] = scaler.fit_transform(df_encoded[numerical_features])

In [None]:
df_encoded

Unnamed: 0,Age,Sex,CheastPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,NumMajorVessels,Thal,Target
0,-0.268437,1,0,-0.377636,-0.659332,0,1,0.821321,0,-0.060888,2,2,3,0
1,-0.158157,1,0,0.479107,-0.833861,1,0,0.255968,1,1.727137,0,0,3,0
2,1.716595,1,0,0.764688,-1.396233,0,1,-1.048692,1,1.301417,0,0,3,0
3,0.724079,1,0,0.936037,-0.833861,0,1,0.516900,0,-0.912329,2,1,3,0
4,0.834359,0,0,0.364875,0.930822,1,1,-1.874977,0,0.705408,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,0.503520,1,1,0.479107,-0.484803,0,1,0.647366,1,-0.912329,2,0,2,1
1021,0.613800,1,0,-0.377636,0.232705,0,0,-0.352873,1,1.471705,1,1,3,0
1022,-0.819834,1,0,-1.234378,0.562371,0,0,-1.353113,1,-0.060888,1,1,2,0
1023,-0.488996,0,0,-1.234378,0.155137,0,0,0.429923,0,-0.912329,2,0,2,1


In [None]:
# Разделение данных на обучающую и тестовую выборки

X = df_encoded.drop('Target', axis=1)
y = df_encoded['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def evaluate_model(y_true, y_pred, y_pred_proba, model_name):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred_proba)

    print(model_name)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"ROC AUC: {roc_auc:.4f}")

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'roc_auc': roc_auc
    }

In [None]:
# Константная модель (baseline)

dummy_model = DummyClassifier(strategy='most_frequent')
dummy_model.fit(X_train, y_train)
dummy_predictions = dummy_model.predict(X_test)
dummy_proba = dummy_model.predict_proba(X_test)[:, 1]
dummy_model_result = evaluate_model(y_test, dummy_predictions, dummy_proba, "Константная модель (baseline)")

Константная модель (baseline)
Accuracy: 0.5024
Precision: 0.5024
Recall: 1.0000
F1-score: 0.6688
ROC AUC: 0.5000


In [None]:
# Логистическая регрессия

lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)
lr_proba = lr_model.predict_proba(X_test)[:, 1]
lr_model_results = evaluate_model(y_test, lr_predictions, lr_proba, "Логистическая регрессия")

Логистическая регрессия
Accuracy: 0.7951
Precision: 0.7563
Recall: 0.8738
F1-score: 0.8108
ROC AUC: 0.8771


In [None]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1,
                                         max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)
gb_predictions = gb_model.predict(X_test)
gb_proba = gb_model.predict_proba(X_test)[:, 1]
gb_model_results = evaluate_model(y_test, gb_predictions, gb_proba, "Градиентный бустинг")

Градиентный бустинг
Accuracy: 0.9317
Precision: 0.9159
Recall: 0.9515
F1-score: 0.9333
ROC AUC: 0.9807


In [None]:
# Анализ важности признаков для градиентного бустинга

feature_importance = pd.DataFrame({
    'feature': X_train.columns,
    'importance': gb_model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nТоп-10 важных признаков (Градиентный бустинг):")
print(feature_importance.head(10))


Топ-10 важных признаков (Градиентный бустинг):
            feature  importance
2    CheastPainType    0.269297
11  NumMajorVessels    0.164227
12             Thal    0.155700
9           Oldpeak    0.117248
0               Age    0.065266
4       Cholesterol    0.055624
7             MaxHR    0.049970
10         ST_Slope    0.041020
3         RestingBP    0.033338
1               Sex    0.021087
