# Assignment-6: Decision Tree (ID3 Algorithm)

**Objective:** Build a decision tree using the ID3 algorithm, classify new samples, and interpret results. 

---

## Q1: Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print('Libraries imported')

### (Helper) Create sample dataset (Play Tennis)

In [None]:
data = {
    'Outlook':['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast','Sunny','Sunny','Rain','Sunny','Overcast','Overcast','Rain'],
    'Temperature':['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild'],
    'Humidity':['High','High','High','High','Normal','Normal','Normal','High','Normal','Normal','Normal','High','Normal','High'],
    'Wind':['Weak','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Strong'],
    'PlayTennis':['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
}
df = pd.DataFrame(data)
df.to_csv('play_tennis.csv', index=False)
df

## Q2: Load Dataset and View First 10 Rows

In [None]:
dataset = pd.read_csv('play_tennis.csv')
dataset.head(10)

## Q3: Dataset Summary

In [None]:
print('Shape:', dataset.shape)
print('\nColumns:', dataset.columns.tolist())
print('\nClass Distribution:')
dataset['PlayTennis'].value_counts()

## Q4: Encode Categorical Data (Label Encoding)

In [None]:
from sklearn.preprocessing import LabelEncoder
encoders = {}
encoded_df = dataset.copy()
for col in encoded_df.columns:
    encoders[col] = LabelEncoder()
    encoded_df[col] = encoders[col].fit_transform(encoded_df[col])
encoded_df.head()

## Q5: Separate Features (X) and Target (y)

In [None]:
X = encoded_df.drop('PlayTennis', axis=1)
y = encoded_df['PlayTennis']
X.head()

## Q6: Train–Test Split (70:30)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)
print('Training size:', X_train.shape)
print('Testing size:', X_test.shape)

## Q7: Build Decision Tree (ID3 using Entropy)

In [None]:
dt = DecisionTreeClassifier(criterion='entropy', random_state=1)
dt.fit(X_train, y_train)
print('Decision Tree model trained')

## Q8: Visualize Decision Tree

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(12,6))
plot_tree(dt, feature_names=X.columns, class_names=['No','Yes'], filled=True)
plt.show()

## Q9: Predict and Evaluate

In [None]:
y_pred = dt.predict(X_test)
results = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred})
results['Status'] = results.apply(lambda r: 'Correct' if r.Actual == r.Predicted else 'Wrong', axis=1)
display(results)
print('\nAccuracy:', accuracy_score(y_test, y_pred))
print('\nConfusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('\nClassification Report:\n', classification_report(y_test, y_pred))

## Q10: Classify a New Sample & Answer Queries

In [None]:
# Example: Outlook=Sunny, Temperature=Mild, Humidity=High, Wind=Weak
sample = pd.DataFrame({
    'Outlook':['Sunny'],
    'Temperature':['Mild'],
    'Humidity':['High'],
    'Wind':['Weak']
})

# encode sample using same encoders
encoded_sample = sample.copy()
for col in encoded_sample.columns:
    encoded_sample[col] = encoders[col].transform(encoded_sample[col])

prediction = dt.predict(encoded_sample)
label = encoders['PlayTennis'].inverse_transform(prediction)
print('Prediction for sample:', label[0])

---
### ✔ End of Assignment-6
Write your conclusion based on accuracy and tree interpretation.