## Importing usual libraries

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Getting our heart-disease dataset ready

In [5]:
heart_disease = pd.read_csv("../Data/heart-disease.csv")
heart_disease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Splitting x and y axes

In [6]:
X = heart_disease.drop("target",axis=1)
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2


In [7]:
y=heart_disease["target"]
y.head()

0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64

## Split data into training and test set
We use :
`from sklearn.model_selection import train_test_split`

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((242, 13), (61, 13), (242,), (61,))

## Importing RandomForestCassifier

In [11]:

from sklearn.ensemble import RandomForestClassifier
np.random.seed(42)
# Instantiating RandomForestClassifier
model = RandomForestClassifier(n_estimators=100)

In [12]:
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2


## Fit the model to the data

In [13]:
# Train the model
model.fit(X_train, y_train)

## Evaluating the RFC

In [14]:
# Evaluation or Outcome
model.score(X_test, y_test)

0.819672131147541

# Prediction by machine learning model

In [15]:
# we feed the test data from earlier
model.predict(X_test)

array([0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1])

In [16]:
# and predict whether or not a person has heart disease based on X-Test dataset
np.array(y_test)

array([1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1])

In [17]:
y_preds=model.predict(X_test)
y_preds

array([0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1])

## Comparison of accuracy using test data and predicted data

### Prediction using `np.mean`

In [18]:
np.mean(y_preds==y_test)

0.819672131147541

### Prediction using `score()`

In [19]:
model.score(X_test,y_test)

0.819672131147541

### Prediction using sklearn library `sklearn.metrics - accuracy_score`

In [20]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_preds)

0.819672131147541

_We get the same accuracy by all three methods_

### Prediction using `cross_val_score()`

In [21]:
from sklearn.model_selection import cross_val_score 
cross_val_score(model, X, y, cv = 5, scoring = None)

array([0.83606557, 0.8852459 , 0.83606557, 0.8       , 0.75      ])