## Numpy 

In [1]:
import numpy as np

X = np.array([[0, 1, 0, 1],
              [1, 0, 1, 1,],
              [0, 0, 0, 1],
              [1, 0, 1, 0]])
y = np.array([0, 1, 0, 1])

In [2]:
counts = {}

for label in np.unique(y):
    counts[label] = X[y == label].sum(axis = 0)
print("Feature counts: \n{}".format(counts))

Feature counts: 
{0: array([0, 1, 0, 2]), 1: array([2, 0, 2, 1])}


## Decision Trees

In [3]:
import mglearn

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

from sklearn.tree import DecisionTreeClassifier

cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target, random_state = 42)

tree = DecisionTreeClassifier(random_state=0)
tree.fit(X_train, y_train)

print("Training set accuracy: {:.3f}".format(tree.score(X_train,y_train)))
print("Testing set accuracy: {:.3f}".format(tree.score(X_test, y_test)))

Training set accuracy: 1.000
Testing set accuracy: 0.937


### PrePrunning the Tree

In [5]:
tree = DecisionTreeClassifier(max_depth=4, random_state=0)
tree.fit(X_train, y_train)

print("Training set accuracy: {:.3f}".format(tree.score(X_train,y_train)))
print("Testing set accuracy: {:.3f}".format(tree.score(X_test, y_test)))

Training set accuracy: 0.988
Testing set accuracy: 0.951
