In [2]:
import pandas as pd
import numpy as np
from collections import Counter


In [3]:
df = pd.read_csv('data.csv')


In [4]:
print(df.head())
X = df.drop('Rain', axis=1)
y = df['Rain']

  Humidity    Wind Temperature Rain
0     High  Strong         Hot   No
1     High    Weak         Hot   No
2     High  Strong        Mild  Yes
3   Medium    Weak        Cool  Yes
4      Low    Weak        Cool  Yes


In [5]:
from sklearn.preprocessing import LabelEncoder


label_encoders = {}
for column in X.columns:
    le = LabelEncoder()
    X[column] = le.fit_transform(X[column])
    label_encoders[column] = le
target_encoder = LabelEncoder()
y = target_encoder.fit_transform(y)


data = X.copy()
data['Rain'] = y

In [6]:
import numpy as np

def entropy(y):
    
    vals, counts = np.unique(y, return_counts=True)
    probs = counts / counts.sum()
    return -np.sum(probs * np.log2(probs))

def information_gain(data, split_attr, target_attr):
    
    total_entropy = entropy(data[target_attr])
    values, counts = np.unique(data[split_attr], return_counts=True)
    weighted_entropy = 0

    for v, c in zip(values, counts):
        subset = data[data[split_attr] == v]
        weighted_entropy += (c / len(data)) * entropy(subset[target_attr])

    return total_entropy - weighted_entropy


In [7]:
def build_tree(data, features, target_attr):
    # If all examples have same class, return that class
    if len(np.unique(data[target_attr])) == 1:
        return np.unique(data[target_attr])[0]
    
    # If no features left, return most common target value
    if len(features) == 0:
        return data[target_attr].mode()[0]
    
    # Find best feature to split on
    gains = [information_gain(data, feature, target_attr) for feature in features]
    best_feature = features[np.argmax(gains)]
    
    tree = {best_feature: {}}
    feature_values = np.unique(data[best_feature])
    
    for value in feature_values:
        subset = data[data[best_feature] == value]
        subtree = build_tree(subset, [f for f in features if f != best_feature], target_attr)
        tree[best_feature][value] = subtree

    return tree


In [8]:
features = list(X.columns)
tree = build_tree(data, features, 'Rain')
print(tree)


{'Humidity': {0: {'Wind': {0: {'Temperature': {1: 0, 2: 1}}, 1: 0}}, 1: {'Wind': {0: {'Temperature': {0: 0, 1: 1}}, 1: 1}}, 2: 1}}


In [9]:
def predict(tree, sample):
    for attr in tree:
        value = sample[attr]
        subtree = tree[attr].get(value)
        if isinstance(subtree, dict):
            return predict(subtree, sample)
        else:
            return subtree


In [10]:
sample = X.iloc[0].to_dict()  # First example
prediction = predict(tree, sample)
print("Predicted:", target_encoder.inverse_transform([prediction])[0])


Predicted: No
