In [1]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import graphviz
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split 

In [2]:
# Loading dataset
cancer = load_breast_cancer()
X, y, labels, features = cancer.data, cancer.target, cancer.target_names, cancer.feature_names
print('labels:', labels)
print('features:', features)

labels: ['malignant' 'benign']
features: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']


In [3]:
# Splitting data for train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
#DecisionTrees
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_train, y_train)

print("train accuracy= {:.3%}".format(clf.score(X_train, y_train)))
print("test accuracy= {:.3%}".format(clf.score(X_test, y_test)))

#Visualisation
graph_viz = tree.export_graphviz(
    clf,
    out_file=None,
    feature_names=features,
    class_names=labels,
    filled=True
)
graph = graphviz.Source(graph_viz)
graph.view(cleanup=True)  

train accuracy= 100.000%
test accuracy= 88.112%


'Source.gv.pdf'

In [5]:
#Random forest
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=0)
clf.fit(X_train, y_train)

print("train accuracy= {:.3%}".format(clf.score(X_train, y_train)))
print("test accuracy= {:.3%}".format(clf.score(X_test, y_test)))

train accuracy= 100.000%
test accuracy= 97.203%


In [6]:
#Gradient boosting
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(random_state=0)
clf.fit(X_train, y_train)

print("train accuracy= {:.3%}".format(clf.score(X_train, y_train)))
print("test accuracy= {:.3%}".format(clf.score(X_test, y_test)))

train accuracy= 100.000%
test accuracy= 96.503%


In [7]:
#XGB
from xgboost import XGBClassifier
 
clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=0)
clf.fit(X_train, y_train) 

print("train accuracy= {:.3%}".format(clf.score(X_train, y_train)))
print("test accuracy= {:.3%}".format(clf.score(X_test, y_test)))

train accuracy= 100.000%
test accuracy= 97.203%


Parameters: { "use_label_encoder" } are not used.

