Importing all need modules

In [1]:
import math

import matplotlib.pyplot as plotter
import numpy
import pandas
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

Reading the CSV

In [None]:
heart_data = pandas.read_csv("https://raw.githubusercontent.com/ahmedheltaher/statistical-analysis-project/master/heart.csv")

Data Setup for the model

In [None]:
heart_data['Sex'] = (heart_data['Sex'] == 'M').astype(int)

chest_pain_types = {'ATA': 0, 'NAP': 1, 'ASY': 2, 'TA': 3}
heart_data['ChestPainType'].replace(chest_pain_types, inplace=True)

heart_data['ExerciseAngina'] = (
    heart_data['ExerciseAngina'] == 'Y').astype(int)

Regression model with all fields except the ECG related ones

In [None]:
x_train_features = ['Age', 'Sex', 'ChestPainType', 'RestingBP',
                    'Cholesterol', 'FastingBS', 'MaxHR', 'ExerciseAngina', 'Oldpeak']
x_train = numpy.array(heart_data[x_train_features])
y_train = numpy.array(heart_data['HeartDisease'])

regressor = linear_model.LinearRegression()
regressor.fit(x_train, y_train)

Function to convert the value of the liner regression to a logistic regression since we need to classify and bigamizing the output using 0.5 as threshold

In [None]:
def logisticPredication(x, regressor: linear_model.LinearRegression) -> int():
    return (1 / (1 + math.exp(-(-regressor.intercept_ + x)))) > 0.5

The Prediction function

In [None]:
def predict(data: list) -> int():
    predict = regressor.predict([data])
    return logisticPredication(x=predict, regressor=regressor)

Calculating the value of R^2

In [None]:
print("R ^ 2:", regressor.score(x_train, y_train))

Calculating the Mean squared error 

In [None]:
y_prediction = regressor.predict(x_train)
print("Mean squared error: ",
      mean_squared_error(y_train, y_prediction))

finding coefficients and sort them by the most effecting ones

In [None]:
coefficients = {feature: regressor.coef_[index] for (
    index, feature) in enumerate(x_train_features)}

coefficients = dict(sorted(coefficients.items(),
                    key=lambda item: item[1], reverse=True))

Plotting the liner regression behavior for the top 2 effecting features 

In [None]:
x_surf, y_surf = numpy.meshgrid(numpy.linspace(heart_data[list(coefficients.keys())[0]].min(), heart_data[list(coefficients.keys())[0]].max(
), 27), numpy.linspace(heart_data[list(coefficients.keys())[1]].min(), heart_data[list(coefficients.keys())[1]].max(), 34))


fittedY = regressor.predict(x_train)

# convert the predicted result in an array
fittedY = numpy.array(fittedY).reshape(x_surf.shape)


# Visualize the Data for Multiple Linear Regression
fig = plotter.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(heart_data[list(coefficients.keys())[0]], heart_data[list(coefficients.keys())[1]], heart_data['HeartDisease'],
           c='red', marker='o', alpha=0.5)
ax.plot_surface(x_surf, y_surf, fittedY, color='b', alpha=0.3)
ax.set_xlabel(list(coefficients.keys())[0])
ax.set_ylabel(list(coefficients.keys())[0])
ax.set_zlabel('HeartDisease')
plotter.show()
