In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# Load the dataset
data = pd.read_csv('Hear_attack.csv')  # Replace with your actual file path

In [3]:
data.head()

Unnamed: 0,RestHR,MaxHR,RecHR,BP,Heart_attack
0,100,133,89,106,Light
1,105,143,89,116,Light
2,96,134,89,109,Light
3,101,144,89,119,Light
4,96,136,85,106,Light


In [4]:
# Split the data into features and target variable
X = data.drop('Heart_attack', axis=1)  # Features
y = data['Heart_attack']                # Target

In [5]:
X.head(),y.head()

(   RestHR  MaxHR  RecHR   BP
 0     100    133     89  106
 1     105    143     89  116
 2      96    134     89  109
 3     101    144     89  119
 4      96    136     85  106,
 0    Light
 1    Light
 2    Light
 3    Light
 4    Light
 Name: Heart_attack, dtype: object)

In [6]:
# Split the dataset into training and test sets with 70% for training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=42)

In [7]:
# Initialize the StandardScaler
scaler = StandardScaler()

In [8]:
# Fit the scaler using only the training data and transform both training and test data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Initialize the Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

In [10]:
# Train the Random Forest classifier using the scaled training data
rf.fit(X_train_scaled, y_train)

In [11]:
# Make predictions on the test data
predictions = rf.predict(X_test_scaled)

In [12]:
# Output the model metrics
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

[[69  0  0]
 [ 0 66  3]
 [ 0  5 67]]
              precision    recall  f1-score   support

       Light       1.00      1.00      1.00        69
     Massive       0.93      0.96      0.94        69
        Mild       0.96      0.93      0.94        72

    accuracy                           0.96       210
   macro avg       0.96      0.96      0.96       210
weighted avg       0.96      0.96      0.96       210



In [13]:
# Feature importance
feature_importances = rf.feature_importances_
features = X.columns
importance_df = pd.DataFrame({'Feature': features, 'Importance': feature_importances}).sort_values(by='Importance', ascending=False)

In [14]:
# Show the most important feature
most_important_feature = importance_df.iloc[0]
print(f"The most important feature is: {most_important_feature['Feature']}")
print(f"Importance: {most_important_feature['Importance']:.4f}")
print("Feature importances:")
print(importance_df)

The most important feature is: RecHR
Importance: 0.4709
Feature importances:
  Feature  Importance
2   RecHR    0.470872
3      BP    0.386159
0  RestHR    0.099145
1   MaxHR    0.043824
