In [5]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text


In [6]:
# 1. Create the dataset (14 rows)
data = {
    'Age': ['<21','<21','21-35','>35','>35','>35','21-35','<21','<21','>35','<21','21-35','21-35','>35',],
    'Income': ['High','High','High','Medium','Low','Low','Low','Medium','Low','Medium','Medium','Medium','High','Medium'],
    'Gender': ['Male','Male','Male','Male','Female','Female','Female','Male','Female','Female','Female','Male','Female','Male'],
    'MaritalStatus': ['Single','Married','Single','Single','Single','Married','Married','Single','Married','Single','Married','Married','Single','Married'],
    'Buys': ['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
}


In [7]:
df = pd.DataFrame(data)

In [8]:
df

Unnamed: 0,Age,Income,Gender,MaritalStatus,Buys
0,<21,High,Male,Single,No
1,<21,High,Male,Married,No
2,21-35,High,Male,Single,Yes
3,>35,Medium,Male,Single,Yes
4,>35,Low,Female,Single,Yes
5,>35,Low,Female,Married,No
6,21-35,Low,Female,Married,Yes
7,<21,Medium,Male,Single,No
8,<21,Low,Female,Married,Yes
9,>35,Medium,Female,Single,Yes


In [9]:
# 2. Encode categorical variables into numbers
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in df.columns:
    df[col] = le.fit_transform(df[col])




In [10]:
# 3. Features (X) and Target (y)
X = df.drop('Buys', axis=1)
y = df['Buys']


In [11]:
# 4. Train Decision Tree Classifier
clf = DecisionTreeClassifier(criterion='entropy')
clf.fit(X, y)



In [12]:
# 5. Show tree rules
tree_rules = export_text(clf, feature_names=list(X.columns))
print(tree_rules)


|--- Age <= 0.50
|   |--- class: 1
|--- Age >  0.50
|   |--- Gender <= 0.50
|   |   |--- MaritalStatus <= 0.50
|   |   |   |--- Age <= 1.50
|   |   |   |   |--- class: 1
|   |   |   |--- Age >  1.50
|   |   |   |   |--- class: 0
|   |   |--- MaritalStatus >  0.50
|   |   |   |--- class: 1
|   |--- Gender >  0.50
|   |   |--- Age <= 1.50
|   |   |   |--- class: 0
|   |   |--- Age >  1.50
|   |   |   |--- MaritalStatus <= 0.50
|   |   |   |   |--- class: 0
|   |   |   |--- MaritalStatus >  0.50
|   |   |   |   |--- class: 1



In [13]:
# 6. Test Data: [Age <21, Income=Low, Gender=Female, MaritalStatus=Married]
# Encode with same LabelEncoder logic
test_data = pd.DataFrame([['>35','Medium','Male','Married']], columns=['Age','Income','Gender','MaritalStatus'])


In [14]:
# Apply label encoding column-wise
for col in test_data.columns:
    test_data[col] = le.fit_transform(test_data[col])  #  Better way: use separate encoders per column


In [15]:
# Predict
prediction = clf.predict(test_data)
print("Prediction for test data:", "Yes (Buys)" if prediction[0]==1 else "No (Does not buy)")


Prediction for test data: Yes (Buys)


In [29]:
df.to_csv("data.csv", index=False)

In [30]:
#import packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [32]:
#reading Dataset
dataset=pd.read_csv("data.csv")
X=dataset.iloc[:,:-1]
# Target (last column)
y = dataset.iloc[:, -1].values

In [33]:
# Perform Label encoding
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X = X.apply(LabelEncoder().fit_transform)


In [34]:
print("Features after encoding:\n", X)

Features after encoding:
     Age  Income  Gender  MaritalStatus
0     1       0       1              1
1     1       0       1              0
2     0       0       1              1
3     2       2       1              1
4     2       1       0              1
5     2       1       0              0
6     0       1       0              0
7     1       2       1              1
8     1       1       0              0
9     2       2       0              1
10    1       2       0              0
11    0       2       1              0
12    0       0       0              1
13    2       2       1              0


In [35]:
# Train Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
regressor = DecisionTreeClassifier()
regressor.fit(X, y)


In [36]:
# Predict value for a new input
X_in = np.array([1,1,0,0])   # sample input
y_pred = regressor.predict([X_in])
print("Prediction:", y_pred)


Prediction: [1]




In [41]:
# Visualize tree
from io import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus

In [40]:
!pip install pydotplus


Collecting pydotplus
  Downloading pydotplus-2.0.2.tar.gz (278 kB)
     ---------------------------------------- 0.0/278.7 kB ? eta -:--:--
     -- ---------------------------------- 20.5/278.7 kB 330.3 kB/s eta 0:00:01
     -- ---------------------------------- 20.5/278.7 kB 330.3 kB/s eta 0:00:01
     -- ---------------------------------- 20.5/278.7 kB 330.3 kB/s eta 0:00:01
     ---- -------------------------------- 30.7/278.7 kB 119.1 kB/s eta 0:00:03
     --------- --------------------------- 71.7/278.7 kB 281.8 kB/s eta 0:00:01
     --------- --------------------------- 71.7/278.7 kB 281.8 kB/s eta 0:00:01
     --------- --------------------------- 71.7/278.7 kB 281.8 kB/s eta 0:00:01
     --------- --------------------------- 71.7/278.7 kB 281.8 kB/s eta 0:00:01
     ------------------ ----------------- 143.4/278.7 kB 327.9 kB/s eta 0:00:01
     ------------------- ---------------- 153.6/278.7 kB 339.7 kB/s eta 0:00:01
     ------------------- ---------------- 153.6/278.7 kB 339

In [38]:

dot_data = StringIO()
export_graphviz(regressor, out_file=dot_data, feature_names=X.columns,
                filled=True, rounded=True, special_characters=True)


In [42]:
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('Decision_Tree.png')
Image(graph.create_png())

InvocationException: GraphViz's executables not found