In [33]:
import torch
import torch.nn as nn
# create training dataset
train_dataset=[[0, 0, 0, 0, 0, 0, 1],[1, 0, 1, 0, 0, 0, 1],
               [1, 0, 0, 0, 0, 0, 1],[0, 0, 1, 0, 0, 0 ,1],
               [2, 0, 0, 0, 0, 0, 1],[0, 1, 0, 0, 1, 1, 1],
               [1, 1, 0, 1, 1, 1, 1],[1, 1, 0, 0, 1, 0, 1],
               [1, 1, 1, 1, 1, 0, 0],[0, 2, 2, 0, 2, 1, 0],
               [2, 2, 2, 2, 2, 0, 0],[2, 0, 0, 2, 2, 1, 0],
               [0, 1, 0, 1, 0, 0, 0],[2, 1, 1, 1, 0, 0, 0],
               [1, 1, 0, 0, 1, 1, 0],[2, 0, 0, 2, 2, 0, 0],
               [0, 0, 1, 1, 1, 0, 0]
]


In [34]:
# turn input data into one-hot vector
def one_hot(input_data):
    # load data into torch and change the data's dimension(turn M×n into n×M)
    input_data_copy = torch.tensor(input_data).t()
    # get the numbers of data and data's feature
    feature_num, data_num = input_data_copy.shape
    # create a tenosr to save output
    output_X = torch.tensor([])
    for i in range(feature_num - 1): # the last dimension is result
        # compute the feature_i's dimension after one-hot
        output_X_i_shape = data_num, int(input_data_copy[i].max().item())+1
        output_X_i = torch.zeros(output_X_i_shape, dtype=torch.float).scatter_(1, input_data_copy[i].view(-1,1), 1)
        output_X = torch.cat((output_X, output_X_i), 1) # put the two matrix together
    # get the label of each data
    output_y = input_data_copy[-1].view(-1, 1) * 2 - 1
    return output_X, output_y.float()


In [35]:
# test one_hot function
train_X, train_y = one_hot(train_dataset)
print(train_X.shape) # 17×(3+3+3+3+3+2)
print(train_y.shape) # 17×1


torch.Size([17, 17])
torch.Size([17, 1])


In [36]:
# add density and sugar content
new_feature = torch.tensor([[0.697, 0.460], [0.774, 0.376], [0.634, 0.264], [0.608, 0.318], 
               [0.556, 0.215], [0.403, 0.237], [0.481, 0.149], [0.437, 0.211], 
               [0.666, 0.091], [0.243, 0.267], [0.245, 0.057], [0.343, 0.099],
               [0.639, 0.161], [0.657, 0.198], [0.360, 0.370], [0.593, 0.042],
               [0.719, 0.103]
])
#train_X = torch.cat((train_X, new_feature), 1)
train_X = new_feature
print(train_X.shape)


torch.Size([17, 2])


In [37]:
# define the model
def createModel(input_channel, output_channel):
    net = nn.Sequential(
        nn.Linear(input_channel, output_channel)
    )
    return net


In [38]:
# define the loss
def Loss(y_hat, y):
    tmp = y * y_hat
    l = (tmp < 0).float() * tmp
    return abs(l).sum()


In [39]:
class TreeNode():
    def __init__(self, model=None, predicted=-1, left=None, right=None):
        self.model = model
        self.predicted = predicted
        self.left = left
        self.right = right


In [40]:
def train(net, train_X, train_y, epochs, lr, print_frequence=0):
    optim = torch.optim.SGD(net.parameters(), lr=lr)
    for epoch in range(epochs):
        optim.zero_grad()
        y_hat = net(train_X)
        l = Loss(train_y, y_hat)
        l.backward()
        optim.step()
        if print_frequence:
            if (epoch+1) % print_frequence == 0:
                print("epoch:%d, loss:%f" % (epoch, l.item()))
                print("epoch:%d, accuracy:%0.2f%%\n" % (epoch, evaluate(net, train_X, train_y)))
    
def evaluate(net, train_X, train_y):
    y_hat = net(train_X)
    y_hat = (y_hat>=0).float() * 2 - 1
    accuray = 100 * (y_hat == train_y).sum().float() / len(train_y) 
    return accuray


In [41]:
# test train function
net = createModel(2, 1)
train(net, train_X, train_y, 40, 0.01, 10)

epoch:9, loss:0.871629
epoch:9, accuracy:29.41%

epoch:19, loss:0.696851
epoch:19, accuracy:29.41%

epoch:29, loss:0.598939
epoch:29, accuracy:35.29%

epoch:39, loss:0.452202
epoch:39, accuracy:35.29%



In [42]:
def createTree(tree, train_X, train_y, epochs, lr, precision):
    if len(train_y) == 0:
        return None
    tree.model = createModel(train_X.shape[1], train_y.shape[1])
    train(tree.model, train_X, train_y, epochs, lr)
    # binnary training set according to predicted value
    train_set = binaryTrainSet(tree.model, train_X, train_y)
    # create left subtree
    if len(train_set[0][1])==0 or evaluate(tree.model, train_set[0][0], train_set[0][1]) > precision:
        tree.left = TreeNode(predicted=0)
    else:
        tree.left = TreeNode()
        createTree(tree.left, train_set[0][0], train_set[0][1], epochs, lr, precision)
    
    # create right subtree
    if len(train_set[1][1])==0 or evaluate(tree.model, train_set[1][0], train_set[1][1]) > precision:
        tree.right = TreeNode(predicted=1)
    else:
        tree.right = TreeNode()
        createTree(tree.right, train_set[1][0], train_set[1][1], epochs, lr, precision)
    
def binaryTrainSet(net, train_X, train_y):
    y_hat = net(train_X)
    train_set = [[torch.tensor([]), []] for _ in range(2)] # create a empty list to store result
    for index in range(len(train_y)):
        class_id = int(y_hat[index]>=0)
        train_set[class_id][0] = torch.cat((train_set[class_id][0], train_X[index].view(1, -1)), 0)
        train_set[class_id][1].append(train_y[index].item())
    for i in range(2):
        train_set[i][1] = torch.tensor(train_set[i][1], dtype=torch.float).view(-1, 1)
    return train_set


In [43]:
tree = TreeNode() 
createTree(tree, train_X, train_y, 200, 0.01, 90)


In [44]:
def predict(tree, X):
    if tree.predicted != -1:
        result = tree.predicted
        #print(result)
        return result
    else:
        y_hat = tree.model(X)
        if y_hat >= 0:
            return predict(tree.right, X)
        else :
            return predict(tree.left, X)


In [45]:
def evaluate_tree(tree, test_X, test_y):
    number = len(test_X) # the number of data
    right_number = 0
    for index in range(number):
        y_hat = predict(tree, test_X[index]) * 2 - 1
        if y_hat == test_y[index]:
            right_number += 1
    accuracy = 100 * right_number / number
    return accuracy


In [46]:
evaluate_tree(tree, train_X, train_y)

100.0

In [51]:
queue = [tree]
res_w=[]
res_b=[]
while queue:
    currentNode=queue.pop(0)
    if(currentNode.model==None):
        break
    res_w.append(currentNode.model[0].weight)
    res_b.append(currentNode.model[0].bias)
    if currentNode.left.model!=None:
        queue.append(currentNode.left)
    if currentNode.right!=None:
        queue.append(currentNode.right)
for i in range(len(res_b)):
    print("节点",i,"的linear的参数:",res_w[i][0].detach().numpy(),"\n阈值为:",res_b[i][0].detach().numpy(),"\n")

节点 0 的linear的参数: [0.10950919 0.32785192] 
阈值为: -0.09765445 

节点 1 的linear的参数: [0.00404184 0.18627784] 
阈值为: -0.08863847 

节点 2 的linear的参数: [0.00649861 0.10929873] 
阈值为: -0.043555226 

