# Heart Disease Prediction using Logistic Regression

In [None]:
# importing necessary libraries
import pandas as pd
import numpy as np

import scipy.optimize as opt
import statsmodels.api as sm
import pylab as pl
from sklearn import preprocessing

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
%matplotlib inline
import seaborn as sns


### Data Loading

In [None]:
# dataset
disease_df = pd.read_csv('framingham.csv')

In [None]:
disease_df.head(5)

### Data Preparation

In [None]:
# data cleaning and data preparation
disease_df.drop(['education'], inplace=True, axis=1)
disease_df.rename(columns={'male':'sex_male'}, inplace=True)

In [None]:
disease_df.dropna(axis = 0, inplace = True)

In [None]:
disease_df.count()

### Splitting the Dataset into Test and Train Sets

In [None]:
# Data Normalization
x = np.asarray(disease_df[['age', 'sex_male', 'cigsPerDay','totChol', 'sysBP', 'glucose']])
y = np.asarray(disease_df['TenYearCHD'])

x = preprocessing.StandardScaler().fit(x).transform(x)

In [None]:
# splitting datasets to Test and Train sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.3, random_state = 4)


print ('Train set:', x_train.shape,  y_train.shape)
print ('Test set:', x_test.shape,  y_test.shape)

### Exploratory Data Analysis

In [None]:
# Couting no. of patient affected with CHD
sns.countplot(x='TenYearCHD', data=disease_df, palette=["#32cd32", "#FF0000"])
plt.title('Count of Patient affeted with CHD')

### Using Logistic Regression Model for Heart Disease Prediction

In [None]:
from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression()
logReg.fit(x_train, y_train)
y_pred = logReg.predict(x_test)

### Model Evaluation

In [None]:
from sklearn.metrics import accuracy_score
print("The accuracy score of the model is: ", accuracy_score(y_test, y_pred))

#### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_test, y_pred)

conf_mat = pd.DataFrame(data = cm, index=['Actual:0','Actual:1'], columns=['Predicted:0','Predicted:1'])

sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Greens')

In [None]:
print('The details for confusion matrix is =')
print (classification_report(y_test, y_pred))

In [None]:
# Thank You