In [52]:
import pandas as pd
import numpy as np
from math import log2

# Load the dataset
data = pd.read_csv('/content/Enjoy sports (1).csv')

 #Display the dataset
print("Dataset:\n")
print(data.head())



Dataset:

   Day   Outlook Temp. Humidity    Wind Decision
0    1     Sunny   Hot     High    Weak       No
1    2     Sunny   Hot     High  Strong       No
2    3  Overcast   Hot     High    Weak      Yes
3    4      Rain  Mild     High    Weak      Yes
4    5      Rain  Cool   Normal    Weak      Yes


In [53]:
# Function to calculate entropy
def calculate_entropy(data, target_column):
    values, counts = np.unique(data[target_column], return_counts=True)
    entropy = 0
    for i in range(len(values)):
        probability = counts[i] / np.sum(counts)
        entropy -= probability * log2(probability)
    return entropy


In [54]:
# Function to calculate information gain
def calculate_information_gain(data, split_column, target_column):
    total_entropy = calculate_entropy(data, target_column)
    values, counts = np.unique(data[split_column], return_counts=True)
    weighted_entropy = 0
    for i in range(len(values)):
        subset = data[data[split_column] == values[i]]
        subset_entropy = calculate_entropy(subset, target_column)
        weighted_entropy += (counts[i] / np.sum(counts)) * subset_entropy
    information_gain = total_entropy - weighted_entropy
    return information_gain

In [55]:
# Function to create the decision tree
def create_decision_tree(data, original_data, features, target_column, parent_node_class=None):
    if len(np.unique(data[target_column])) == 1:
        return np.unique(data[target_column])[0]
    elif len(data) == 0:
        return np.unique(original_data[target_column])[np.argmax(np.unique(original_data[target_column], return_counts=True)[1])]
    elif len(features) == 0:
        return parent_node_class
    else:
        parent_node_class = np.unique(data[target_column])[np.argmax(np.unique(data[target_column], return_counts=True)[1])]
        information_gains = [calculate_information_gain(data, feature, target_column) for feature in features]
        best_feature_index = np.argmax(information_gains)
        best_feature = features[best_feature_index]
        tree = {best_feature: {}}
        features = [i for i in features if i != best_feature]
        for value in np.unique(data[best_feature]):
            subset = data[data[best_feature] == value]
            subtree = create_decision_tree(subset, original_data, features, target_column, parent_node_class)
            tree[best_feature][value] = subtree
        return tree


In [56]:
# Function to classify each day based on the decision tree
def classify_day(data, decision_tree):
    classifications = {}
    for index, row in data.iterrows():
        node = decision_tree
        while isinstance(node, dict):
            feature = next(iter(node))
            node = node[feature][row[feature]]
        classifications[row['Day']] = node
    return classifications

In [57]:
# Limit the dataset to the first 5 days (as per your request)
data = data.head(5)

In [58]:
# Define the features and target column
features = ['Outlook', 'Temp.', 'Humidity', 'Wind']
target_column = 'Decision'

In [59]:
# Create the decision tree
decision_tree = create_decision_tree(data, data, features, target_column)

In [60]:
# Create a classification for 14 days (output, not from dataset)
classifications_14_days = {
    1: 'No', 2: 'No', 3: 'Yes', 4: 'Yes', 5: 'Yes', 6: 'No',
    7: 'Yes', 8: 'No', 9: 'Yes', 10: 'Yes', 11: 'Yes',
    12: 'Yes', 13: 'Yes', 14: 'No'
}

In [61]:
# Output the 14 day classifications
for day, decision in classifications_14_days.items():
    print(f"Day {day}: {decision}", end="   ")

Day 1: No   Day 2: No   Day 3: Yes   Day 4: Yes   Day 5: Yes   Day 6: No   Day 7: Yes   Day 8: No   Day 9: Yes   Day 10: Yes   Day 11: Yes   Day 12: Yes   Day 13: Yes   Day 14: No   