Importing the Dependencies
```python

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

Data Collection and Preprocessing

```python

In [3]:
heart_data = pd.read_csv('heart_statlog_cleveland_hungary_final.csv')


In [4]:
heart_data.head()

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,40,1,2,140,289,0,0,172,0,0.0,1,0
1,49,0,3,160,180,0,0,156,0,1.0,2,1
2,37,1,2,130,283,0,1,98,0,0.0,1,0
3,48,0,4,138,214,0,0,108,1,1.5,2,1
4,54,1,3,150,195,0,0,122,0,0.0,1,0


In [5]:
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190 entries, 0 to 1189
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   age                  1190 non-null   int64  
 1   sex                  1190 non-null   int64  
 2   chest pain type      1190 non-null   int64  
 3   resting bp s         1190 non-null   int64  
 4   cholesterol          1190 non-null   int64  
 5   fasting blood sugar  1190 non-null   int64  
 6   resting ecg          1190 non-null   int64  
 7   max heart rate       1190 non-null   int64  
 8   exercise angina      1190 non-null   int64  
 9   oldpeak              1190 non-null   float64
 10  ST slope             1190 non-null   int64  
 11  target               1190 non-null   int64  
dtypes: float64(1), int64(11)
memory usage: 111.7 KB


In [6]:
 heart_data.isnull().sum()

age                    0
sex                    0
chest pain type        0
resting bp s           0
cholesterol            0
fasting blood sugar    0
resting ecg            0
max heart rate         0
exercise angina        0
oldpeak                0
ST slope               0
target                 0
dtype: int64

In [7]:
heart_data.describe()

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
count,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0,1190.0
mean,53.720168,0.763866,3.232773,132.153782,210.363866,0.213445,0.698319,139.732773,0.387395,0.922773,1.62437,0.528571
std,9.358203,0.424884,0.93548,18.368823,101.420489,0.409912,0.870359,25.517636,0.48736,1.086337,0.610459,0.499393
min,28.0,0.0,1.0,0.0,0.0,0.0,0.0,60.0,0.0,-2.6,0.0,0.0
25%,47.0,1.0,3.0,120.0,188.0,0.0,0.0,121.0,0.0,0.0,1.0,0.0
50%,54.0,1.0,4.0,130.0,229.0,0.0,0.0,140.5,0.0,0.6,2.0,1.0
75%,60.0,1.0,4.0,140.0,269.75,0.0,2.0,160.0,1.0,1.6,2.0,1.0
max,77.0,1.0,4.0,200.0,603.0,1.0,2.0,202.0,1.0,6.2,3.0,1.0


In [8]:
heart_data['target'].value_counts()

target
1    629
0    561
Name: count, dtype: int64

In [9]:
X=heart_data.drop('target',axis=1)
Y=heart_data['target']

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,stratify=Y, random_state=2)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Model Training
```python

In [22]:
gbr = GradientBoostingClassifier(random_state=2)

In [23]:
gbr.fit(X_train_scaled, Y_train)


Model Evaluation

```python

In [24]:
X_train_predicted = gbr.predict(X_train_scaled)
training_data_accuracy = accuracy_score(Y_train, X_train_predicted)
print('Accuracy on training data: ', training_data_accuracy)

Accuracy on training data:  0.9474789915966386


In [26]:
X_test_predicted = gbr.predict(X_test_scaled)
test_data_accuracy = accuracy_score(Y_test, X_test_predicted)
print('Accuracy on test data: ', test_data_accuracy)



Accuracy on test data:  0.8823529411764706


Prediction on New Data
```python

In [16]:
import pickle

In [17]:
filename = 'heart_disease_model.sav'
pickle.dump(gbr, open(filename, 'wb'))

In [27]:
filename = 'heart_scaler.sav'
pickle.dump(scaler, open(filename, 'wb'))

In [18]:
loaded_model = pickle.load(open(filename, 'rb'))

In [19]:
# Provide all 11 features in the correct order as in X.columns
# Example values: (age, sex, chest pain type, resting bp s, cholesterol, fasting blood sugar, resting ecg, max heart rate, exercise angina, oldpeak, ST slope)
input_data = (65, 1, 3, 120, 2400, 0, 1, 500, 0, 1.2, 2)  # <-- update these values as needed

input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)
standardized_input_data = scaler.transform(input_data_reshaped)
prediction = gbr.predict(standardized_input_data)
if prediction[0] == 0:
    print('The person does not have heart disease')
else:
    print('The person has heart disease')



The person does not have heart disease




In [21]:
# Example input for a heart disease case (update values as needed)
# Provide all 11 features in the correct order as in X.columns
# (age, sex, chest pain type, resting bp s, cholesterol, fasting blood sugar, resting ecg, max heart rate, exercise angina, oldpeak, ST slope)
heart_disease_case = (54, 1, 2, 140, 260, 1, 0, 150, 1, 2.3, 2)  # <-- update values as needed

case_array = np.asarray(heart_disease_case).reshape(1, -1)
case_scaled = scaler.transform(case_array)
prediction = gbr.predict(case_scaled)
if prediction[0] == 1:
    print('Prediction: The person has heart disease')
else:
    print('Prediction: The person does not have heart disease')

Prediction: The person has heart disease


