In [1]:
import csv

def load_data(filename):
    with open(filename, 'r') as file:
        data = list(csv.DictReader(file))
    return data


In [2]:
import math
from collections import Counter

def entropy(data, target_attr):
    label_counts = Counter(row[target_attr] for row in data)
    total = len(data)
    return -sum((count / total) * math.log2(count / total) for count in label_counts.values())

def info_gain(data, attr, target_attr):
    total_entropy = entropy(data, target_attr)
    attr_values = set(row[attr] for row in data)
    weighted_entropy = 0

    for val in attr_values:
        subset = [row for row in data if row[attr] == val]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset, target_attr)
        
    return total_entropy - weighted_entropy


In [3]:
def build_tree(data, attributes, target_attr):
    labels = [row[target_attr] for row in data]
    if labels.count(labels[0]) == len(labels):
        return labels[0]
    if not attributes:
        return Counter(labels).most_common(1)[0][0]
    
    gains = [(attr, info_gain(data, attr, target_attr)) for attr in attributes]
    best_attr = max(gains, key=lambda x: x[1])[0]
    
    tree = {best_attr: {}}
    for val in set(row[best_attr] for row in data):
        subset = [row for row in data if row[best_attr] == val]
        new_attrs = [a for a in attributes if a != best_attr]
        subtree = build_tree(subset, new_attrs, target_attr)
        tree[best_attr][val] = subtree
    return tree


In [4]:
def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    attr = next(iter(tree))
    value = sample.get(attr)
    subtree = tree[attr].get(value)
    if subtree is None:
        return "Unknown"
    return predict(subtree, sample)


In [5]:
def print_tree(tree, indent=""):
    if not isinstance(tree, dict):
        print(indent + "-> " + tree)
        return
    for attr, branches in tree.items():
        for val, subtree in branches.items():
            print(f"{indent}🔹 {attr} = {val}")
            print_tree(subtree, indent + "    ")


In [6]:
filename = 'decision_tree.csv'
data = load_data(filename)
attributes = list(data[0].keys())
attributes.remove('play')

tree = build_tree(data, attributes, 'play')
print("Decision Tree:", tree)




Decision Tree: {'outlook': {'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}, 'rainy': {'windy': {'false': 'yes', 'true': 'no'}}, 'overcast': 'yes'}}


In [7]:
print_tree(tree)


🔹 outlook = sunny
    🔹 humidity = high
        -> no
    🔹 humidity = normal
        -> yes
🔹 outlook = rainy
    🔹 windy = false
        -> yes
    🔹 windy = true
        -> no
🔹 outlook = overcast
    -> yes


In [8]:
# Predict for a new input
new_sample = {
    "outlook": "sunny",
    "temperature": "cool",
    "humidity": "high",
    "wind": "strong"
}
print("Prediction:", predict(tree, new_sample))

Prediction: no


In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv('decision_tree.csv')

# Separate features and target
X = data.drop(columns=['play'])  # Independent variables
y = data['play']  # Target variable

# Encode categorical variables
label_encoders = {}
for column in X.columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le

y = LabelEncoder().fit_transform(y)  # Encode target variable

# Train Decision Tree Classifier
clf = DecisionTreeClassifier(criterion='entropy', random_state=0)
clf.fit(X, y)

# Display decision tree
print(export_text(clf, feature_names=list(X.columns)))

# Predict for a new sample
new_sample = pd.DataFrame([{"outlook": "sunny", "temp": "cool", "humidity": "high", "windy": "strong"}])

# Encode new sample using the same encoders
for column in new_sample.columns:
    new_sample[column] = label_encoders[column].transform(new_sample[column])

# Make prediction
prediction = clf.predict(new_sample)
print("Prediction:", "yes" if prediction[0] == 1 else "no")


|--- outlook <= 0.50
|   |--- class: 1
|--- outlook >  0.50
|   |--- humidity <= 0.50
|   |   |--- outlook <= 1.50
|   |   |   |--- windy <= 0.50
|   |   |   |   |--- class: 1
|   |   |   |--- windy >  0.50
|   |   |   |   |--- class: 0
|   |   |--- outlook >  1.50
|   |   |   |--- class: 0
|   |--- humidity >  0.50
|   |   |--- windy <= 0.50
|   |   |   |--- class: 1
|   |   |--- windy >  0.50
|   |   |   |--- temp <= 1.00
|   |   |   |   |--- class: 0
|   |   |   |--- temp >  1.00
|   |   |   |   |--- class: 1

Prediction: no


In [4]:
clf