<a href="https://colab.research.google.com/github/bhavanitha-jpg/Internship/blob/main/climatic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math
import pandas as pd
from pprint import pprint

# Sample dataset (Play Tennis)
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny',
                'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild',
                    'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High',
                 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak',
             'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No',
                   'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Entropy function
def entropy(labels):
    total = len(labels)
    unique_labels = set(labels)
    entropy_value = 0
    for label in unique_labels:
        count = sum(1 for l in labels if l == label)
        prob = count / total
        entropy_value -= prob * math.log2(prob)
    return entropy_value

# Information gain function
def info_gain(df, feature, target='PlayTennis'):
    total_entropy = entropy(df[target])
    values = df[feature].unique()
    weighted_entropy = 0
    for val in values:
        subset = df[df[feature] == val]
        weight = len(subset) / len(df)
        weighted_entropy += weight * entropy(subset[target])
    return total_entropy - weighted_entropy

# Print entropy
print('Entropy of PlayTennis:', round(entropy(df['PlayTennis']), 4))  # ← Fixed missing parenthesis

# Evaluate info gain for each feature
features = ['Outlook', 'Temperature', 'Humidity', 'Wind']
for feature in features:
    gain = info_gain(df, feature)
    print(f"Information Gain for {feature}: {round(gain, 4)}")

# Decision tree builder
def build_tree(df, features, target='PlayTennis', depth=0)):
    # If all target values are the same, return that value
    if len(set(df[target])) == 1:
        return list(df[target])[0]

    # If no features left, return majority class
    if len(features) == 0:
        return df[target].mode()[0]

    # Choose best feature
    gains = {feature: info_gain(df, feature, target) for feature in features}
    best_feature = max(gains, key=gains.get)

    tree = {best_feature: {}}

    for value in df[best_feature].unique():
        sub_df = df[df[best_feature] == value]
        remaining_features = [f for f in features if f != best_feature]
        subtree = build_tree(sub_df, remaining_features, target, depth + 1)
        tree[best_feature][value] = subtree
    return tree

# Build and print the decision tree
decision_tree = build_tree(df, features)
pprint(decision_tree)


Entropy of PlayTennis: 0.9403
Information Gain for Outlook: 0.2467
Information Gain for Temperature: 0.0292
Information Gain for Humidity: 0.1518
Information Gain for Wind: 0.0481
{'Outlook': {'Overcast': 'Yes',
             'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},
             'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}
