# Logistic Regression / Decision Tree

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing the dataset

In [2]:
dataset = pd.read_csv("Data/heart.csv")

In [3]:
dataset.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [5]:
X.shape

(303, 13)

## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
print(X_test.shape, y_test.shape)

(61, 13) (61,)


## Feature Scaling

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

In [9]:
X_test = sc.transform(X_test)

In [10]:
# X_train
# X_test

## Training the Logistic Regression model on the Training set

In [11]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)

In [12]:
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

## Training the Decision Tree model on the Training set

In [13]:
from sklearn.tree import DecisionTreeClassifier
decision_classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)

In [14]:
decision_classifier.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', random_state=0)

## Predicting a new Logistic Regression result

In [15]:
# age 70, sex 1, cp 3, trestbps 130, chol 240, fbs 1, restecg 0, thalach 170, exang 0, oldpeak 2.5, slope 0, ca 0, thal 2
print(f"Target: {classifier.predict(sc.transform([[70, 1, 3, 130, 240, 1, 0, 170, 0, 2.5, 0, 0, 2]]))}")
print(f"Probability: {classifier.predict_proba(sc.transform([[70, 1, 3, 130, 240, 1, 0, 170, 0, 2.5, 0, 0, 2]]))}")

Target: [1]
Probability: [[0.24782702 0.75217298]]


## Predicting a new Decision Tree result

In [16]:
# age 70, sex 1, cp 3, trestbps 130, chol 240, fbs 1, restecg 0, thalach 170, exang 0, oldpeak 2.5, slope 0, ca 0, thal 2
print(f"Target: {decision_classifier.predict(sc.transform([[70, 1, 3, 130, 240, 1, 0, 170, 0, 2.5, 0, 0, 2]]))}")
print(f"Probability: {decision_classifier.predict_proba(sc.transform([[70, 1, 3, 130, 240, 1, 0, 170, 0, 2.5, 0, 0, 2]]))}")

Target: [1]
Probability: [[0. 1.]]


## Predicting the Logistic Regression Test set results

In [17]:
y_pred = classifier.predict(X_test)

In [18]:
# print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

In [19]:
df = pd.DataFrame({'Orginal value': y_test.flatten(), 'Predicted value': y_pred.flatten()})
df

Unnamed: 0,Orginal value,Predicted value
0,0,0
1,1,1
2,0,1
3,0,0
4,1,0
...,...,...
56,1,1
57,0,1
58,1,1
59,1,1


## Predicting the Decision Tree Test set results

In [20]:
y_decision_pred = decision_classifier.predict(X_test)

In [21]:
# print(np.concatenate((y_decision_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

In [22]:
df = pd.DataFrame({'Orginal value': y_test.flatten(), 'Predicted value': y_decision_pred.flatten()})
df

Unnamed: 0,Orginal value,Predicted value
0,0,0
1,1,0
2,0,0
3,0,0
4,1,0
...,...,...
56,1,1
57,0,1
58,1,1
59,1,1


## Making the Confusion Matrix for Logistic Regression

In [23]:
# Flase negatives, false positives
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[21  6]
 [ 3 31]]


In [24]:
print(accuracy_score(y_test, y_pred))

0.8524590163934426


## Making the Confusion Matrix for Decision Tree

In [25]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_decision_pred)
print(cm)

[[22  5]
 [ 8 26]]


In [26]:
print(accuracy_score(y_test, y_decision_pred))

0.7868852459016393
