<a href="https://colab.research.google.com/github/irfanharis31/Heart-disease-prediction/blob/main/Copy_of_heart_disease_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Heart Disease

### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn import preprocessing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Importing Dataset


In [None]:
data=pd.read_csv('/content/drive/MyDrive/Heart_Disease_Prediction.csv')

### Checking For Duplicates



In [None]:
data[data.duplicated()]

In [None]:
data = data.rename(columns={'Heart Disease': 'heartdisease'})

In [None]:
data['heartdisease']=data['heartdisease'].map({'Presence':1,'Absence':0})

### Data Analysis

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.columns

In [None]:
data.describe()

In [None]:
data.head()

In [None]:
data.tail()

### Pre-Processing

In [None]:
data.isnull().sum() #checking for total null values

In [None]:
data["Cholesterol"].value_counts()

In [None]:
data["heartdisease"].value_counts()

In [None]:
# See the min, max, mean values
print('The highest cholestrol was of:',data['Cholesterol'].max())
print('The lowest cholestrol was of:',data['Cholesterol'].min())
print('The average cholestrol in the data:',data['Cholesterol'].mean())

### Data Visualization in form of graphs

In [None]:
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(13,5))
data_len=data[data['heartdisease']==1]['Cholesterol'].value_counts()

ax1.hist(data_len,color='red')
ax1.set_title('Having heart disease')

data_len=data[data['heartdisease']==0]['Cholesterol'].value_counts()
ax2.hist(data_len,color='green')
ax2.set_title('NOT heart disease')

fig.suptitle('Heart Disease')
plt.show()

In [None]:
# visualization
plt.plot(data['BP'])
plt.xlabel("BP")
plt.ylabel("Levels")
plt.title("BP Line Plot")
plt.show()

In [None]:
# visualization
plt.plot(data['Cholesterol'])
plt.xlabel("Cholesterol")
plt.ylabel("Levels")
plt.title("Cholesterol")
plt.show()

### Normalization

In [None]:
data[1:5]

In [None]:
from sklearn import preprocessing
import pandas as pd

d = preprocessing.normalize(data.iloc[:,1:5], axis=0)
scaled_df = pd.DataFrame(d, columns=["Age", "BP", "Cholesterol", "Sex"])
scaled_df.head()

In [None]:
from sklearn.model_selection import train_test_split #training and testing data split
from sklearn import metrics #accuracy measure
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score,accuracy_score, classification_report #for confusion matrix
from sklearn.linear_model import LogisticRegression,LinearRegression #logistic regression

### Split into train and test in the ratio 70:30

In [None]:
train,test=train_test_split(data,test_size=0.3,random_state=42,stratify=data['heartdisease'])
train_X=train[train.columns[:-1]]
train_Y=train[train.columns[-1:]]
test_X=test[test.columns[:-1]]
test_Y=test[test.columns[-1:]]
X=data[data.columns[:-1]]
Y=data['heartdisease']
len(train_X), len(train_Y), len(test_X), len(test_Y)

In [None]:
model = LogisticRegression(C=0.1, penalty='l2')
model.fit(train_X,train_Y)
prediction3=model.predict(test_X)
print('The accuracy of the Logistic Regression is',metrics.accuracy_score(prediction3,test_Y))
report = classification_report(test_Y, prediction3)
print("Classification Report:\n", report)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Replace these values with your actual scores
precision = [0.95, 0.87]
recall = [0.89, 0.94]
f1_score = [0.92, 0.91]

labels = ['Class 0', 'Class 1']

# Plotting the bar chart
width = 0.2
x = np.arange(len(labels))

fig, ax = plt.subplots()
rects1 = ax.bar(x - width, precision, width, label='Precision')
rects2 = ax.bar(x, recall, width, label='Recall')
rects3 = ax.bar(x + width, f1_score, width, label='F1-Score')

# Adding labels, title, and legend
ax.set_ylabel('Scores')
ax.set_title('Logistic Regression Model Evaluation Metrics')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

# Display the plot
plt.show()

In [None]:
print(precision)
print(recall)
print(f1_score)

In [None]:
model = LogisticRegression(C=0.1, penalty='l2')
model.fit(train_X, train_Y)
#accuracy = accuracy_score(test_Y, predictions.round())
predictions = model.predict(test_X)
mse = mean_squared_error(test_Y, predictions)
rmse = mean_squared_error(test_Y, predictions, squared=False)
mae = mean_absolute_error(test_Y, predictions)
r2 = r2_score(test_Y, predictions)
print('The accuracy of Logistic Regression is:', metrics.accuracy_score(predictions,test_Y))
print('Mean Squared Error:', mse)
print('Root Mean Squared Error:', rmse)
print('Mean Absolute Error:', mae)
print('R-squared:',r2)