In [1]:
import pandas as pd
import numpy as np
import logging

# Configuração do logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

data = {
    "Aspecto": ["Sol", "Sol", "Nuvens", "Chuva", "Chuva", "Chuva", "Nuvens", "Sol", "Sol", "Chuva", "Sol", "Nuvens", "Nuvens", "Chuva"],
    "Temp": ["Quente", "Quente", "Quente", "Ameno", "Fresco", "Fresco", "Fresco", "Ameno", "Fresco", "Ameno", "Ameno", "Quente", "Quente", "Ameno"],
    "Humidade": ["Elevada", "Elevada", "Elevada", "Elevada", "Normal", "Normal", "Normal", "Normal", "Normal", "Elevada", "Normal", "Elevada", "Normal", "Elevada"],
    "Vento": ["Fraco", "Forte", "Fraco", "Fraco", "Fraco", "Forte", "Fraco", "Forte", "Fraco", "Fraco", "Forte", "Fraco", "Forte", "Forte"],
    "Tenis": ["Não", "Não", "Sim", "Sim", "Sim", "Não", "Sim", "Não", "Sim", "Sim", "Sim", "Sim", "Sim", "Não"]
}
df = pd.DataFrame(data)
X = df.drop(columns=['Tenis'])  
y = df['Tenis']

def entropia(col):
    counts = np.unique(col, return_counts=True)
    N = float(col.shape[0])
    ent = 0.0
    for ix in counts[1]:
        p = ix / N
        ent += -1.0 * p * np.log2(p)
    logging.debug(f"Entropia calculada: {ent} para coluna: {col.name if hasattr(col, 'name') else col}")
    return ent

def information_gain(df, attr, target):
    total = entropia(df[target])
    valores = np.unique(df[attr])
    acc = 0
    logging.debug(f"Calculando information gain para atributo '{attr}'")
    for v in valores:
        subset = df[df[attr] == v][target]
        x = len(subset) / len(df)
        ent_sub = entropia(subset)
        acc += x * ent_sub
        logging.debug(f"Valor {v}: proporção={x:.3f}, entropia={ent_sub:.3f}, acc parcial={acc:.3f}")
    ig = total - acc
    logging.debug(f"Information Gain para '{attr}': {ig:.3f}")
    return ig

class DecisionTreeCategorical:
    def __init__(self, depth=0, max_depth=3):
        self.children = {}
        self.attr = None
        self.max_depth = max_depth
        self.depth = depth
        self.target = None

    def train(self, df, features, target):
        logging.debug(f"{'  '*self.depth}Treinando nó (profundidade={self.depth}) com {len(df)} exemplos. Features: {features}")
        # Se todos são iguais, ou acabou feature, vira folha
        if len(np.unique(df[target])) == 1 or len(features) == 0 or self.depth >= self.max_depth:
            self.target = df[target].mode()[0]
            logging.debug(f"{'  '*self.depth}Nó folha criado com target={self.target}")
            return

        # Escolhe melhor atributo
        gains = [information_gain(df, attr, target) for attr in features]
        best_attr = features[np.argmax(gains)]
        self.attr = best_attr
        self.children = {}
        logging.debug(f"{'  '*self.depth}Melhor atributo para split: {best_attr}")

        for v in np.unique(df[best_attr]):
            subset = df[df[best_attr] == v]
            logging.debug(f"{'  '*self.depth}Criando filho para valor '{v}' ({len(subset)} exemplos)")
            if subset.empty:
                self.children[v] = None
            else:
                child = DecisionTreeCategorical(depth=self.depth+1, max_depth=self.max_depth)
                child.train(subset, [f for f in features if f != best_attr], target)
                self.children[v] = child

    def predict(self, row):
        if self.attr is None or self.children == {}:
            logging.debug(f"Predição em nó folha: retorna {self.target}")
            return self.target
        val = row[self.attr]
        logging.debug(f"Testando atributo '{self.attr}' com valor '{val}'")
        if val in self.children and self.children[val] is not None:
            return self.children[val].predict(row)
        else:
            logging.debug(f"Valor '{val}' não encontrado, retorna modo do nó: {self.target}")
            return self.target  # fallback para modo do nó

# Treinando a árvore
features = ["Aspecto", "Temp", "Humidade", "Vento"]
target = "Tenis"
tree = DecisionTreeCategorical(max_depth=3)
tree.train(df, features, target)

# Testando as previsões
for i in range(len(df)):
    test_row = df.iloc[i]
    previsto = tree.predict(test_row)
    real = test_row[target]
    logging.info(f"Exemplo {i+1}: Previsto={previsto}, Real={real}")

2025-05-21 16:57:22,535 - DEBUG - Treinando nó (profundidade=0) com 14 exemplos. Features: ['Aspecto', 'Temp', 'Humidade', 'Vento']
2025-05-21 16:57:22,537 - DEBUG - Entropia calculada: 0.9402859586706311 para coluna: Tenis
2025-05-21 16:57:22,538 - DEBUG - Calculando information gain para atributo 'Aspecto'
2025-05-21 16:57:22,539 - DEBUG - Entropia calculada: 0.9709505944546686 para coluna: Tenis
2025-05-21 16:57:22,540 - DEBUG - Valor Chuva: proporção=0.357, entropia=0.971, acc parcial=0.347
2025-05-21 16:57:22,542 - DEBUG - Entropia calculada: 0.0 para coluna: Tenis
2025-05-21 16:57:22,543 - DEBUG - Valor Nuvens: proporção=0.286, entropia=0.000, acc parcial=0.347
2025-05-21 16:57:22,545 - DEBUG - Entropia calculada: 0.9709505944546686 para coluna: Tenis
2025-05-21 16:57:22,545 - DEBUG - Valor Sol: proporção=0.357, entropia=0.971, acc parcial=0.694
2025-05-21 16:57:22,546 - DEBUG - Information Gain para 'Aspecto': 0.247
2025-05-21 16:57:22,547 - DEBUG - Entropia calculada: 0.9402859