In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [3]:
Data = load_breast_cancer()
df = pd.DataFrame(data=np.c_[Data.data,Data.target],columns=[list(Data.feature_names)+["target"]])
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [4]:
df.shape

(569, 31)

## Split the features and level

In [5]:
features = df.iloc[:,0:-1]
level = df.iloc[:,-1]

In [6]:
xtrain,xtest,ytrain,ytest = train_test_split(features,level,test_size=0.2,random_state=2031)

In [7]:
print("Shape of xtrain: ",xtrain.shape)
print("Shape of ytrain: ",ytrain.shape)
print("Shape of xtest: ",xtest.shape)
print("Shape of ytest: ",ytest.shape)

Shape of xtrain:  (455, 30)
Shape of ytrain:  (455,)
Shape of xtest:  (114, 30)
Shape of ytest:  (114,)


## Create Model

In [8]:
# n_estimators means number of decision create by algorithm. you can change value if you want 
# to create more than 100 tree ex: n_estimators=200 as your wish
rfc = RandomForestClassifier(n_estimators=100,criterion="gini") 

In [9]:
rfc.fit(X=xtrain,y=ytrain)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

## Accuracy

In [10]:
rfc.score(X=xtest,y=ytest)

0.9649122807017544

In [12]:
pred = rfc.predict(xtest)
pred

array([0., 0., 1., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 1.,
       0., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 1.,
       1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1.,
       1., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 1.,
       0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0.])

In [17]:
predict_one = xtest.iloc[75,:].values
predict_one

array([9.742e+00, 1.567e+01, 6.150e+01, 2.899e+02, 9.037e-02, 4.689e-02,
       1.103e-02, 1.407e-02, 2.081e-01, 6.312e-02, 2.684e-01, 1.409e+00,
       1.750e+00, 1.639e+01, 1.380e-02, 1.067e-02, 8.347e-03, 9.472e-03,
       1.798e-02, 4.261e-03, 1.075e+01, 2.088e+01, 6.809e+01, 3.552e+02,
       1.467e-01, 9.370e-02, 4.043e-02, 5.159e-02, 2.841e-01, 8.175e-02])

In [18]:
rfc.predict([predict_one])

array([1.])

In [19]:
Data.target_names

array(['malignant', 'benign'], dtype='<U9')