In [1]:
import math
import csv

In [3]:
class Node:
    def __init__(self, attribute):
        self.attribute = attribute
        self.children = []
        self.answer = ""

In [7]:
def load_csv(filename):
    with open(r'C:\Users\goura\Desktop\Python projects\experiment-3\Book1.csv') as file:
        lines = csv.reader(file)
        dataset = list(lines)
    headers = dataset.pop(0)
    return dataset, headers

In [9]:
def subtables(data, col, delete=False):
    dic = {}
    coldata = [row[col] for row in data]
    attr = list(set(coldata))
    counts = [0] * len(attr)
    for x in range(len(attr)):
        for y in range(len(data)):
            if data[y][col] == attr[x]:
                counts[x] += 1

    for x in range(len(attr)):
        dic[attr[x]] = []
        for y in range(len(data)):
            if data[y][col] == attr[x]:
                if delete:
                    dic[attr[x]].append(data[y][:col] + data[y][col + 1 :])
                else:
                    dic[attr[x]].append(data[y])
    return attr, dic

In [11]:
def entropy(S):
    attr = list(set(S))
    if len(attr) == 1:
        return 0
    counts = [0] * len(attr)
    for i in range(len(attr)):
        counts[i] = S.count(attr[i]) / (len(S) * 1.0)

    sums = 0
    for cnt in counts:
        sums += -1 * cnt * math.log(cnt, 2)
    return sums

In [13]:
def compute_gain(data, col):
    attr, dic = subtables(data, col, delete=False)
    total_size = len(data)
    entropies = [0] * len(attr)
    ratio = [0] * len(attr)

    total_entropy = entropy([row[-1] for row in data])
    for x in range(len(attr)):
        ratio[x] = len(dic[attr[x]]) / (total_size * 1.0)
        entropies[x] = entropy([row[-1] for row in dic[attr[x]]])
        total_entropy -= ratio[x] * entropies[x]
    return total_entropy

In [15]:
def build_tree(data, features):
    lastcol = [row[-1] for row in data]
    if len(set(lastcol)) == 1:
        node = Node("")
        node.answer = lastcol[0]
        return node

    n = len(data[0]) - 1
    gains = [0] * n
    for col in range(n):
        gains[col] = compute_gain(data, col)

    split = gains.index(max(gains))
    node = Node(features[split])
    fea = features[:split] + features[split + 1 :]
    attr, dic = subtables(data, split, delete=True)

    for x in range(len(attr)):
        child = build_tree(dic[attr[x]], fea)
        node.children.append((attr[x], child))
    return node

In [17]:
def print_tree(node, level=0):
    if node.answer != "":
        print(" " * level, node.answer)
        return

    print(" " * level, node.attribute)
    for value, n in node.children:
        print(" " * (level + 1), value)
        print_tree(n, level + 2)

In [19]:
def classify(node, x_test, features):
    if node.answer != "":
        return node.answer

    pos = features.index(node.attribute)
    for value, n in node.children:
        if x_test[pos] == value:
            return classify(n, x_test, features)

In [21]:
# Main Program
train_data, train_features = load_csv("data3.csv")
test_data, test_features = load_csv("data3_test.csv")

In [23]:
tree = build_tree(train_data, train_features)

In [25]:
print("The decision tree for the dataset using ID3 algorithm is:")
print_tree(tree)

The decision tree for the dataset using ID3 algorithm is:
 Day
  D1
   No
  D10
   Yes
  D3
   Yes
  D9
   Yes
  D8
   No
  D13
   Yes
  D11
   Yes
  D4
   Yes
  D12
   Yes
  D14
   No
  D7
   Yes
  D6
   No
  D2
   No
  D5
   Yes


In [27]:
print("\nClassification results:")
for test in test_data:
    print(f"Test instance: {test}")
    label = classify(tree, test, train_features)
    print(f"Classified as: {label}")


Classification results:
Test instance: ['D1', 'Sunny', 'Hot', 'High', 'Weak', 'No']
Classified as: No
Test instance: ['D2', 'Sunny', 'Hot', 'High', 'Strong', 'No']
Classified as: No
Test instance: ['D3', 'Overcast', 'Hot', 'High', 'Weak', 'Yes']
Classified as: Yes
Test instance: ['D4', 'Rain', 'Mild', 'High', 'Weak', 'Yes']
Classified as: Yes
Test instance: ['D5', 'Rain', 'Cool', 'Normal', 'Weak', 'Yes']
Classified as: Yes
Test instance: ['D6', 'Rain', 'Cool', 'Normal', 'Strong', 'No']
Classified as: No
Test instance: ['D7', 'Overcast', 'Cool', 'Normal', 'Strong', 'Yes']
Classified as: Yes
Test instance: ['D8', 'Sunny', 'Mild', 'High', 'Weak', 'No']
Classified as: No
Test instance: ['D9', 'Sunny', 'Cool', 'Normal', 'Weak', 'Yes']
Classified as: Yes
Test instance: ['D10', 'Rain', 'Mild', 'Normal', 'Weak', 'Yes']
Classified as: Yes
Test instance: ['D11', 'Sunny', 'Mild', 'Normal', 'Strong', 'Yes']
Classified as: Yes
Test instance: ['D12', 'Overcast', 'Mild', 'High', 'Strong', 'Yes']
Cla