In [3]:
import csv
import math
from collections import Counter

class TreeNode:
    def __init__(self, ids=None, children=None, entropy=0, depth=0):
        self.ids = ids or []
        self.entropy = entropy
        self.depth = depth
        self.split_attribute = None
        self.children = children or []
        self.order = None
        self.label = None

    def set_properties(self, split_attribute, order):
        self.split_attribute = split_attribute
        self.order = order

    def set_label(self, label):
        self.label = label

def entropy(freq):
    total = sum(freq)
    return -sum((f/total) * math.log2(f/total) for f in freq if f > 0)

class DecisionTreeID3:
    def __init__(self, max_depth=10, min_samples_split=2, min_gain=1e-4):
        self.root = None
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_gain = min_gain

    def fit(self, data, target):
        self.data = data
        self.target = target
        self.attributes = list(range(len(data[0])))
        self.labels = list(set(target))

        ids = list(range(len(data)))
        self.root = TreeNode(ids=ids, entropy=self._entropy(ids), depth=0)
        queue = [self.root]

        while queue:
            node = queue.pop(0)
            if node.depth < self.max_depth and len(node.ids) >= self.min_samples_split:
                children = self._split(node)
                if children:
                    node.children = children
                    queue.extend(children)
                else:
                    self._set_label(node)
            else:
                self._set_label(node)

    def _entropy(self, ids):
        labels = [self.target[i] for i in ids]
        freq = Counter(labels).values()
        return entropy(freq)

    def _set_label(self, node):
        labels = [self.target[i] for i in node.ids]
        node.set_label(max(set(labels), key=labels.count))

    def _split(self, node):
        ids = node.ids
        best_gain = 0
        best_splits = []
        best_attribute = None
        order = None

        for att in self.attributes:
            values = set(self.data[i][att] for i in ids)
            if len(values) == 1:
                continue

            splits = {val: [] for val in values}
            for i in ids:
                splits[self.data[i][att]].append(i)

            if min(len(split) for split in splits.values()) < self.min_samples_split:
                continue

            HxS = sum(len(split) * self._entropy(split) for split in splits.values()) / len(ids)
            gain = node.entropy - HxS

            if gain < self.min_gain:
                continue

            if gain > best_gain:
                best_gain = gain
                best_splits = list(splits.values())
                best_attribute = att
                order = list(splits.keys())

        node.set_properties(best_attribute, order)
        return [TreeNode(ids=split, entropy=self._entropy(split), depth=node.depth+1) for split in best_splits]

    def predict(self, new_data):
        return [self._predict_one(x) for x in new_data]

    def _predict_one(self, x):
        node = self.root
        while node.children:
            if x[node.split_attribute] not in node.order:
                break
            child_index = node.order.index(x[node.split_attribute])
            node = node.children[child_index]
        return node.label

def load_csv(filename):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        return list(reader)

if __name__ == "__main__":
    # Đọc dữ liệu từ file CSV
    data = load_csv('weather.csv')
    headers = data[0]
    data = data[1:]  # Bỏ qua hàng tiêu đề

    # Tách features và target
    X = [row[:-1] for row in data]
    y = [row[-1] for row in data]

    # Chuyển đổi dữ liệu sang số nếu cần
    for i in range(len(X)):
        for j in range(len(X[i])):
            try:
                X[i][j] = float(X[i][j])
            except ValueError:
                pass  # Giữ nguyên giá trị chuỗi nếu không thể chuyển đổi

    tree = DecisionTreeID3(max_depth=3, min_samples_split=2)
    tree.fit(X, y)
    predictions = tree.predict(X)
    print("Dự đoán:", predictions)

Dự đoán: ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
