In [1]:
import pandas as pd
import math
from sklearn.tree import DecisionTreeClassifier

In [2]:
# Read the CSV file into a pandas DataFrame
df = pd.read_csv('ML_DATA.csv')

In [3]:
# Perform one-hot encoding for categorical variables
df_encoded = pd.get_dummies(df, columns=['Age', 'Has_Job', 'Own_House', 'Credit_Rating'])

In [4]:
# Split the DataFrame into feature matrix (X) and target variable (y)
X = df_encoded.drop('Class', axis=1)
y = df_encoded['Class']

In [5]:
# Calculate the entropy of the target variable (Class)
class_counts = y.value_counts()
total_examples = len(y)
entropy_s = 0

for count in class_counts:
    p = count / total_examples
    entropy_s -= p * math.log2(p)

print("Entropy(S):", entropy_s)

Entropy(S): 0.9709505944546686


In [6]:
# Calculate the information gain for each attribute
information_gains = {}

for attribute in X.columns:
    attribute_entropy = 0
    attribute_value_counts = X[attribute].value_counts()

    for value, count in attribute_value_counts.items():
        value_examples = X[X[attribute] == value]
        value_class_counts = y[value_examples.index].value_counts()
        value_entropy = 0

        for class_count in value_class_counts:
            p = class_count / count
            value_entropy -= p * math.log2(p)

        attribute_entropy += (count / total_examples) * value_entropy

    information_gain = entropy_s - attribute_entropy
    information_gains[attribute] = information_gain

    print("Information Gain({}): {}".format(attribute, information_gain))

Information Gain(Age_Middle): 1.1102230246251565e-16
Information Gain(Age_Old): 0.06364122949221451
Information Gain(Age_Young): 0.05977313014931729
Information Gain(Has_Job_False): 0.32365019815155627
Information Gain(Has_Job_True): 0.32365019815155627
Information Gain(Own_House_False): 0.4199730940219749
Information Gain(Own_House_True): 0.4199730940219749
Information Gain(Credit_Rating_Good): 0.008986624929939513
Information Gain(Credit_Rating_excellent): 0.24199510603823393
Information Gain(Credit_Rating_fair): 0.24902249956730627


In [7]:
# Create the decision tree classifier
classifier = DecisionTreeClassifier()
classifier.fit(X, y)

In [8]:
# Predict the class for a new example
new_example = [[True, False, False, True]]
predicted_class = classifier.predict(new_example)

print("Predicted Class:", predicted_class)



ValueError: X has 4 features, but DecisionTreeClassifier is expecting 10 features as input.