In [1]:
import numpy as np
from mpitree._node import DecisionNode
from mpitree.decision_tree import DecisionTreeClassifier

In [2]:
"""
input => [c, b, c, c, a, b]
output (with order) => [1, 2, 3]

input => [c, b, c, c, b]
output (with order) => [0, 2, 3]
"""

n_classes = [1, 2, 3]
a = np.array([1, 3, 3, 1, 1])
b = np.zeros(3)

c = dict(zip(*np.unique(a, return_counts=True)))

print(n_classes)
print(c)

np.array([c.get(i, 0) for i in n_classes])

[1, 2, 3]
{1: 3, 3: 2}


array([3, 0, 2])

In [3]:
from string import ascii_lowercase
import random


def mat(n_samples=1, shape=(1, 1), *, n_classes=1, repeat=False):
    m, n = shape
    if repeat:
        X = np.tile(np.array(list(ascii_lowercase[: n * 2])), (m, 1))
    else:
        X = np.array(np.split(np.array(list(ascii_lowercase[:n_samples] * 2)), m))

    y = [random.choice(ascii_lowercase[:n_classes]) for _ in range(m)]
    print(np.column_stack((X, y)))
    return X, y

In [4]:
DecisionNode(
    feature=[1, 2, 3],
    state=np.column_stack(mat(n_samples=6, shape=(3, 2), n_classes=2)),
)

[['a' 'b' 'c' 'd' 'b']
 ['e' 'f' 'a' 'b' 'a']
 ['c' 'd' 'e' 'f' 'b']]


DecisionNode(feature=[1, 2, 3], threshold=None, branch=None, depth=0, value=array([], dtype=float64), n_samples=3)

In [5]:
X, y = mat()

clf = DecisionTreeClassifier().fit(X, y)
print(clf)

[['a' 'a' 'a']]
└── class: a


In [6]:
X, y = mat(n_samples=6, shape=(3, 2), n_classes=2, repeat=True)

clf = DecisionTreeClassifier().fit(X, y)
print(clf)

[['a' 'b' 'c' 'd' 'a']
 ['a' 'b' 'c' 'd' 'b']
 ['a' 'b' 'c' 'd' 'a']]
└── class: a


In [7]:
X, y = mat(n_samples=6, shape=(3, 2), n_classes=2)

clf = DecisionTreeClassifier().fit(X, y)
print(clf)

[['a' 'b' 'c' 'd' 'a']
 ['e' 'f' 'a' 'b' 'b']
 ['c' 'd' 'e' 'f' 'a']]

IG(feature_0) = H({'b', 'a'}) - rem(feature_0)
	 = H({'b', 'a'})
		 = - [0.667 0.333] * log([0.667 0.333])
		 = [-0.918]
	 = rem(feature_0)
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = [0.333 0.333 0.333] x [-0. -0. -0.]
		 = [0.0]
	 = [0.918]

IG(feature_1) = H({'b', 'a'}) - rem(feature_1)
	 = H({'b', 'a'})
		 = - [0.667 0.333] * log([0.667 0.333])
		 = [-0.918]
	 = rem(feature_1)
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = [0.333 0.333 0.333] x [-0. -0. -0.]
		 = [0.0]
	 = [0.918]

IG(feature_2) = H({'b', 'a'}) - rem(feature_2)
	 = H({'b', 'a'})
		 = - [0.667 0.333] * log([0.667 0.333])
		 = [-0.918]
	 = rem(feature_2)
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.

In [8]:
X, y = mat(n_samples=15, shape=(5, 3), n_classes=2)

clf = DecisionTreeClassifier().fit(X, y)
print(clf)

display(clf.predict(X))
display(clf.predict_proba(X))
display(clf.score(X, y))

[['a' 'b' 'c' 'd' 'e' 'f' 'b']
 ['g' 'h' 'i' 'j' 'k' 'l' 'b']
 ['m' 'n' 'o' 'a' 'b' 'c' 'b']
 ['d' 'e' 'f' 'g' 'h' 'i' 'a']
 ['j' 'k' 'l' 'm' 'n' 'o' 'a']]

IG(feature_0) = H({'b', 'a'}) - rem(feature_0)
	 = H({'b', 'a'})
		 = - [0.4 0.6] * log([0.4 0.6])
		 = [-0.971]
	 = rem(feature_0)
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = [0.2 0.2 0.2 0.2 0.2] x [-0. -0. -0. -0. -0.]
		 = [0.0]
	 = [0.971]

IG(feature_1) = H({'b', 'a'}) - rem(feature_1)
	 = H({'b', 'a'})
		 = - [0.4 0.6] * log([0.4 0.6])
		 = [-0.971]
	 = rem(feature_1)
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'a'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = H({'b'})
		 = - [1.] * log([1.])
		 = [0.0]
	 = [0.2 0.2 0.2 0.2 0.2]

array(['b', 'b', 'b', 'a', 'a'], dtype='<U1')

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.]])

1.0