<a href="https://colab.research.google.com/github/ibrahimmhammed/projects/blob/main/heart_failure_predection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Importing**


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("//content//heart.csv")

***Exploring*** **and** **describing**


In [None]:
#to explore data
data.head(10)
data.shape
data.info()


In [None]:
#to describe data
data.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


In [None]:
#data cleaning
data.isnull().sum()
data.duplicated().sum()

0

# **Analysis**


In [None]:
#counting patients
data["HeartDisease"].value_counts()

In [None]:
#visulazation
data.hist(figsize=(20,20))
plt.show()

In [None]:
#measuring correlation
cor =data.corr()
sns.heatmap(cor, annot=True,fmt=".2f", linewidths=0.5)

# **processing** **data**


In [None]:
#to convert data from object to numerical
from sklearn.preprocessing import LabelEncoder
lab = LabelEncoder()
obj = data.select_dtypes(include = "object")
non_obj = data.select_dtypes(exclude = "object")
for i in range(0,obj.shape[1]):
  obj.iloc[:,i]=lab.fit_transform(obj.iloc[:,i])


In [None]:
#to concatenate obj and non_obj in a datafreme
df = pd.concat([obj,non_obj],axis=1)
df

# **Model**

In [None]:
#importing modules

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report




In [None]:
#difining x and y
x = df.drop(["HeartDisease"],axis=1)
y= df["HeartDisease"]

In [None]:
#spliting train and test
x_train,x_test,y_train,y_test= train_test_split(x,y,train_size=0.7,shuffle="true",random_state=44)


In [None]:
#testing the model
def testing(model):
  model.fit(x_train,y_train)
  predictid_data = model.predict(x_test)
  print("confusion matrix is \n ", confusion_matrix(predictid_data,y_test))
  print("classification report is \n ", classification_report(predictid_data,y_test))





# **Applying** **algorithms**

In [None]:
# 1- k_neighpors
k_neighpors = KNeighborsClassifier()
testing(k_neighpors)

confusion matrix is 
  [[ 93  40]
 [ 33 110]]
classification report is 
                precision    recall  f1-score   support

           0       0.74      0.70      0.72       133
           1       0.73      0.77      0.75       143

    accuracy                           0.74       276
   macro avg       0.74      0.73      0.73       276
weighted avg       0.74      0.74      0.74       276



In [None]:
# 2- decision_tree
decision_tree = DecisionTreeClassifier()
testing(decision_tree)

confusion matrix is 
  [[ 98  25]
 [ 28 125]]
classification report is 
                precision    recall  f1-score   support

           0       0.78      0.80      0.79       123
           1       0.83      0.82      0.83       153

    accuracy                           0.81       276
   macro avg       0.81      0.81      0.81       276
weighted avg       0.81      0.81      0.81       276



In [None]:
# 3- GaussianNB
gaussian_nb = GaussianNB()
testing(gaussian_nb)


confusion matrix is 
  [[105  20]
 [ 21 130]]
classification report is 
                precision    recall  f1-score   support

           0       0.83      0.84      0.84       125
           1       0.87      0.86      0.86       151

    accuracy                           0.85       276
   macro avg       0.85      0.85      0.85       276
weighted avg       0.85      0.85      0.85       276



In [None]:
#4- SVC
svc = SVC()
testing(svc)

In [None]:
#5- RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=500,criterion="gini")
testing(random_forest)

confusion matrix is 
  [[110  12]
 [ 16 138]]
classification report is 
                precision    recall  f1-score   support

           0       0.87      0.90      0.89       122
           1       0.92      0.90      0.91       154

    accuracy                           0.90       276
   macro avg       0.90      0.90      0.90       276
weighted avg       0.90      0.90      0.90       276



In [None]:
#6- GradientBoostingClassifier
gradient_boosting = GradientBoostingClassifier(learning_rate=0.01,n_estimators=500,loss="exponential",criterion="squared_error")
testing(gradient_boosting)

confusion matrix is 
  [[107  18]
 [ 19 132]]
classification report is 
                precision    recall  f1-score   support

           0       0.85      0.86      0.85       125
           1       0.88      0.87      0.88       151

    accuracy                           0.87       276
   macro avg       0.86      0.87      0.86       276
weighted avg       0.87      0.87      0.87       276

