1. Train a Random Forest Classification Model on the Dataset
2. Predict on the Test Set, and Obtain Prediction Results

# Build a dataset and training model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('process_heart.csv')

X = df.drop('target', axis=1)
y = df['target']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(max_depth=5, n_estimators=100, random_state=5)
model.fit(X_train, y_train)

# The model’s prediction results on the test set

In [2]:
y_pred = model.predict(X_test)
y_pred

array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1], dtype=int64)

In [3]:
y_pred_proba = model.predict_proba(X_test)
y_pred_proba

array([[0.98660366, 0.01339634],
       [0.3661894 , 0.6338106 ],
       [0.82898303, 0.17101697],
       [0.0713009 , 0.9286991 ],
       [0.94456693, 0.05543307],
       [0.05570868, 0.94429132],
       [0.81976441, 0.18023559],
       [0.05807763, 0.94192237],
       [0.78903465, 0.21096535],
       [0.49117695, 0.50882305],
       [0.81947456, 0.18052544],
       [0.05807763, 0.94192237],
       [0.87574699, 0.12425301],
       [0.68923856, 0.31076144],
       [0.16963395, 0.83036605],
       [0.97420693, 0.02579307],
       [0.08302262, 0.91697738],
       [0.0783156 , 0.9216844 ],
       [0.9387684 , 0.0612316 ],
       [0.07629617, 0.92370383],
       [0.49413607, 0.50586393],
       [0.33953592, 0.66046408],
       [0.05463877, 0.94536123],
       [0.91673656, 0.08326344],
       [0.12072365, 0.87927635],
       [0.17604156, 0.82395844],
       [0.44285843, 0.55714157],
       [0.17021319, 0.82978681],
       [0.98890017, 0.01109983],
       [0.04798789, 0.95201211],
       [0.

In [4]:
y_pred_proba[:,1]

array([0.01339634, 0.6338106 , 0.17101697, 0.9286991 , 0.05543307,
       0.94429132, 0.18023559, 0.94192237, 0.21096535, 0.50882305,
       0.18052544, 0.94192237, 0.12425301, 0.31076144, 0.83036605,
       0.02579307, 0.91697738, 0.9216844 , 0.0612316 , 0.92370383,
       0.50586393, 0.66046408, 0.94536123, 0.08326344, 0.87927635,
       0.82395844, 0.55714157, 0.82978681, 0.01109983, 0.95201211,
       0.80556087, 0.82626135, 0.34898589, 0.36679207, 0.94429132,
       0.03254483, 0.20231048, 0.71130345, 0.85728865, 0.45797629,
       0.65509409, 0.02908664, 0.25557156, 0.93198346, 0.12796078,
       0.709123  , 0.96248108, 0.93125411, 0.48847797, 0.17550431,
       0.93610741, 0.19863805, 0.81153988, 0.1830286 , 0.77828351,
       0.67884784, 0.88660957, 0.25492146, 0.93412427, 0.91507902,
       0.96248108, 0.09580391, 0.93105909, 0.86875234, 0.50586393,
       0.41491026, 0.2437425 , 0.02950353, 0.33920085, 0.87631133,
       0.94192237, 0.63232138, 0.77132352, 0.0346562 , 0.61605

In [5]:
y_pred_proba[:,1] > 0.5

array([False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False, False,  True, False,  True,  True,
       False,  True,  True,  True,  True, False,  True,  True,  True,
        True, False,  True,  True,  True, False, False,  True, False,
       False,  True,  True, False,  True, False, False,  True, False,
        True,  True,  True, False, False,  True, False,  True, False,
        True,  True,  True, False,  True,  True,  True, False,  True,
        True,  True, False, False, False, False,  True,  True,  True,
        True, False,  True, False,  True,  True,  True, False,  True,
        True, False, False,  True,  True,  True, False,  True,  True,
       False,  True,  True,  True, False,  True,  True, False,  True,
        True, False, False,  True,  True, False, False, False,  True,
        True, False,  True,  True, False, False, False,  True, False,
       False,  True,  True, False,  True, False, False, False, False,
        True, False,

In [6]:
y_pred

array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1], dtype=int64)