In [11]:
from sklearn import datasets
data_breast_cancer = datasets.load_breast_cancer(as_frame=True)
print(data_breast_cancer['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [12]:
from sklearn.tree import DecisionTreeClassifier
X_cancer = data_breast_cancer['data'][['mean texture', 'mean symmetry']]
y_cancer = data_breast_cancer['target']

from sklearn.model_selection import train_test_split
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.2)

from sklearn.metrics import f1_score
for i in range(20):
    tree_clf = DecisionTreeClassifier(max_depth=i+1)
    tree_clf.fit(X_cancer_train, y_cancer_train)
    #print(data_breast_cancer.feature_names)
    print(tree_clf.max_depth)
    print(f1_score(y_cancer_test, tree_clf.predict(X_cancer_test)))
    
    
tree_clf = DecisionTreeClassifier(max_depth=3)
tree_clf.fit(X_cancer_train, y_cancer_train)

1
0.7540983606557377
2
0.7832167832167832
3
0.7746478873239437
4
0.7947019867549668
5
0.7837837837837838
6
0.7612903225806451
7
0.7516778523489932
8
0.75
9
0.7733333333333334
10
0.7448275862068966
11
0.7361111111111113
12
0.7194244604316546
13
0.723404255319149
14
0.7448275862068966
15
0.7586206896551725
16
0.7272727272727272
17
0.7272727272727272
18
0.7586206896551725
19
0.7272727272727272
20
0.7412587412587414


DecisionTreeClassifier(max_depth=3)

In [None]:
from sklearn.tree import export_graphviz
f = "bc.dot"
export_graphviz(
        tree_clf,
        out_file=f,
        feature_names=data_breast_cancer.feature_names[[1, 8]],
        class_names=[str(num)+", "+name
                     for num,name in
                     zip(set(data_breast_cancer.target),data_breast_cancer.target_names)],
        rounded=True,
        filled=True) 


In [None]:
import graphviz
print(graphviz.render('dot', 'png', f))

In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

#zbior uczacy f1 score
f1_score_train = f1_score(y_cancer_train, tree_clf.predict(X_cancer_train))

#zbior testujacy f1 score
f1_score_test = f1_score(y_cancer_test, tree_clf.predict(X_cancer_test))

print(f1_score_train, f1_score_test)

#zbiór uczący accuracy
y_train_pred = tree_clf.predict(X_cancer_train)
acc_train = accuracy_score(y_cancer_train, y_train_pred)

#zbiór testujący accuracy
y_test_pred = tree_clf.predict(X_cancer_test)
acc_test = accuracy_score(y_cancer_test, y_test_pred)

print(acc_train, acc_test)


In [None]:
import numpy as np
import pandas as pd
size = 300
X = np.random.rand(size)*5-2.5
w4, w3, w2, w1, w0 = 1, 2, 1, -4, 2
y = w4*(X**4) + w3*(X**3) + w2*(X**2) + w1*X + w0 + np.random.randn(size)*8-4 
df = pd.DataFrame({'x': X, 'y': y})
df.plot.scatter(x='x',y='y')

In [None]:
from sklearn.model_selection import train_test_split
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X, y, test_size=0.2)

In [None]:
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor(max_depth=2,
random_state=42)
tree_reg.fit(df.x.values.reshape(-1, 1), df.y)
tree_reg.predict([[0.6]])

In [None]:
from sklearn.tree import export_graphviz
f = "reg_tree.dot"
export_graphviz(
        tree_reg,
        out_file=f,
        
        rounded=True,
        filled=True) 


In [None]:
import graphviz
print(graphviz.render('dot', 'png', f))