In [None]:
import math
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import graphviz

def calculate_entropy(data, target_attribute):
    total_samples = len(data)
    entropy = 0.0
    value_counts = data[target_attribute].value_counts()
    for count in value_counts:
        probability = count / total_samples
        if probability > 0:
            entropy -= probability * math.log2(probability)
    return entropy

def calculate_information_gain(data, feature_attribute, target_attribute):
    initial_entropy = calculate_entropy(data, target_attribute)
    total_samples = len(data)
    weighted_entropy_after_split = 0.0
    for value in data[feature_attribute].unique():
        subset = data[data[feature_attribute] == value]
        subset_entropy = calculate_entropy(subset, target_attribute)
        weight = len(subset) / total_samples
        weighted_entropy_after_split += weight * subset_entropy
    information_gain = initial_entropy - weighted_entropy_after_split
    return information_gain

def train_id3_and_visualize(df, target_column, feature_columns):
    le = LabelEncoder()
    df_encoded = df[feature_columns + [target_column]].copy()
    for column in feature_columns + [target_column]:
        if df_encoded[column].dtype == 'object':
            df_encoded[column] = le.fit_transform(df_encoded[column])

    features = df_encoded[feature_columns]
    target = df_encoded[target_column]

    id3_classifier = DecisionTreeClassifier(criterion='entropy', max_depth=3)
    id3_classifier.fit(features, target)

    dot_data = export_graphviz(
        id3_classifier,
        out_file=None,
        feature_names=features.columns,
        class_names=[str(c) for c in id3_classifier.classes_],
        filled=True,
        rounded=True,
        special_characters=True
    )
    graph = graphviz.Source(dot_data)
    graph.render("id3_tree", format="png", cleanup=True)
    print("\n✅ Decision tree visualization saved as 'id3_tree.png'")

if __name__ == "__main__":
    print("=== ID3 Decision Tree ===\n")

    num_rows = int(input("Enter number of data samples (rows): "))
    feature_columns = input("Enter feature names (comma-separated): ").strip().split(',')
    target_column = input("Enter target column name: ").strip()

    all_columns = feature_columns + [target_column]
    dataset = []

    print(f"\nEnter the dataset rows one by one (features + target):")
    print(f"For example, if columns are {all_columns}, enter values separated by commas.\n")

    for i in range(num_rows):
        row = input(f"Row {i+1}: ").strip().split(',')
        dataset.append(row)

    df = pd.DataFrame(dataset, columns=all_columns)

    initial_entropy = calculate_entropy(df, target_column)
    print(f"\nInitial Entropy of '{target_column}': {initial_entropy:.4f}")
    print("\nInformation Gain for each feature:")
    for feature in feature_columns:
        gain = calculate_information_gain(df, feature, target_column)
        print(f" - {feature}: {gain:.4f}")

    print("\nTraining and visualizing decision tree...")
    train_id3_and_visualize(df, target_column, feature_columns)


=== ID3 Decision Tree ===

Enter number of data samples (rows): 4
Enter feature names (comma-separated): outlook,temperature,humidity
Enter target column name: PLAY

Enter the dataset rows one by one (features + target):
For example, if columns are ['outlook', 'temperature', 'humidity', 'PLAY'], enter values separated by commas.

Row 1: sunny,hot,high,no
Row 2: sunny,mild,high,no
Row 3: overcast,hot,normal,yes
Row 4: rainy,cool,normal,yes

Initial Entropy of 'PLAY': 1.0000

Information Gain for each feature:
 - outlook: 1.0000
 - temperature: 0.5000
 - humidity: 1.0000

Training and visualizing decision tree...

✅ Decision tree visualization saved as 'id3_tree.png'


In [None]:
import math
import pandas as pd

def calculate_entropy(data, target_attribute):
    total_samples = len(data)
    entropy = 0.0
    value_counts = data[target_attribute].value_counts()
    for count in value_counts:
        probability = count / total_samples
        if probability > 0:
            entropy -= probability * math.log2(probability)
    return entropy

def calculate_information_gain(data, feature_attribute, target_attribute):
    initial_entropy = calculate_entropy(data, target_attribute)
    total_samples = len(data)
    weighted_entropy_after_split = 0.0
    for value in data[feature_attribute].unique():
        subset = data[data[feature_attribute] == value]
        subset_entropy = calculate_entropy(subset, target_attribute)
        weight = len(subset) / total_samples
        weighted_entropy_after_split += weight * subset_entropy
    information_gain = initial_entropy - weighted_entropy_after_split
    return information_gain

if __name__ == "__main__":
    print("=== ID3 Entropy & Information Gain Calculator ===\n")

    num_rows = int(input("Enter number of data samples (rows): "))
    feature_columns = input("Enter feature names (comma-separated): ").strip().split(',')
    target_column = input("Enter target column name: ").strip()

    all_columns = feature_columns + [target_column]
    dataset = []

    print(f"\nEnter the dataset rows one by one (features + target):")
    print(f"For example, if columns are {all_columns}, enter values separated by commas.\n")

    for i in range(num_rows):
        row = input(f"Row {i+1}: ").strip().split(',')
        dataset.append(row)

    df = pd.DataFrame(dataset, columns=all_columns)

    initial_entropy = calculate_entropy(df, target_column)
    print(f"\nInitial Entropy of '{target_column}': {initial_entropy:.4f}")

    print("\nInformation Gain for each feature:")
    info_gains = {}
    for feature in feature_columns:
        gain = calculate_information_gain(df, feature, target_column)
        info_gains[feature] = gain
        print(f" - {feature}: {gain:.4f}")

    best_feature = max(info_gains, key=info_gains.get)
    print(f"\n The feature with the highest Information Gain is: '{best_feature}'")
