In [2]:
# Import necessary Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [3]:
#Predictor variables
Outlook = ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Sunny', 'Rainy','Overcast', 'Overcast', 'Sunny']
Temperature = ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild']
Humidity = ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High']
Wind = ['False', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'False', 'False', 'True', 'True', 'False', 'True']

#Class Label:
Play = ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']

In [7]:
dataset_list = list(zip(Outlook, Temperature, Humidity, Wind, Play))
dataset = pd.DataFrame(dataset_list, columns = ["Outlook", "Temperature", "Humidity", "Wind", "Play"])
print("Number of Examples and Features:", dataset.shape)
print(dataset.head())

dataset.describe()

Number of Examples and Features: (14, 5)
    Outlook Temperature Humidity   Wind Play
0     Rainy         Hot     High  False   No
1     Rainy         Hot     High   True   No
2  Overcast         Hot     High  False  Yes
3     Sunny        Mild     High  False  Yes
4     Sunny        Cool   Normal  False  Yes


Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
count,14,14,14,14,14
unique,3,3,2,2,2
top,Rainy,Mild,Normal,False,Yes
freq,5,6,7,8,9


In [8]:
dummy = pd.get_dummies(dataset.iloc[:,:-1])

cols = dataset.columns.tolist()
cols.remove("Play")

dataset = dataset.drop(cols, axis = 1)
dataset = pd.concat([dummy,dataset], axis = 1)

print("\nFinal dataset :\n")
dataset.head()


Final dataset :



Unnamed: 0,Outlook_Overcast,Outlook_Rainy,Outlook_Sunny,Temperature_Cool,Temperature_Hot,Temperature_Mild,Humidity_High,Humidity_Normal,Wind_False,Wind_True,Play
0,0,1,0,0,1,0,1,0,1,0,No
1,0,1,0,0,1,0,1,0,0,1,No
2,1,0,0,0,1,0,1,0,1,0,Yes
3,0,0,1,0,0,1,1,0,1,0,Yes
4,0,0,1,1,0,0,0,1,1,0,Yes


In [9]:
print("\nLabel")
labels = dataset['Play']
labels


Label


0      No
1      No
2     Yes
3     Yes
4     Yes
5      No
6     Yes
7      No
8     Yes
9     Yes
10    Yes
11    Yes
12    Yes
13     No
Name: Play, dtype: object

In [10]:
#split data set into train and test sets
data_train, data_test, target_train, target_test = train_test_split(dataset.iloc[:,:-1], labels, test_size = 0.25, random_state = 93)

In [12]:
DT = DecisionTreeClassifier(max_leaf_nodes = 85, random_state = 85) # Roll no : 85

# fitting the model or training
final_model = DT.fit(data_train,target_train)

# Predicting
target_predict = DT.predict(data_test)

# Accuracy
Accuracy = accuracy_score(target_test, target_predict)
print("Accuracy:", Accuracy)

# Confusion Matrix
c = confusion_matrix(target_test, target_predict)
print("\nConfusion Matrix:\n")
c

Accuracy: 0.75

Confusion Matrix:



array([[1, 0],
       [1, 2]])

In [13]:
#Precision
precision = precision_score(target_test, target_predict, average=None)
print("Precision: {}\n".format(precision))

#Recall
recall = recall_score(target_test, target_predict, average=None)
print("Recall: {}\n".format(recall))

Precision: [0.5 1. ]

Recall: [1.         0.66666667]



In [14]:
# (1) What will be the value of Play, if Outlook is ’Rainy’, Temperature is ’Mild’, Humidity =’Normal’, and Wind = ’False’?

q1 = [0,1,0,0,0,1,0,1,1,0] # Overcast Rainy Sunny Cool Hot Mild High Normal False True

# (2) What will be the value of Play, if Outlook is ’Sunny’, Temeprature is ’Cool’, Humidity =’High’, and Wind = ’True’?

q2 = [0,0,1,1,0,0,1,0,0,1] # Overcast Rainy Sunny Cool Hot Mild High Normal False True



features = dataset.columns.tolist()
features.remove("Play")
df = pd.DataFrame([q1,q2], columns = features)
df.head()

Unnamed: 0,Outlook_Overcast,Outlook_Rainy,Outlook_Sunny,Temperature_Cool,Temperature_Hot,Temperature_Mild,Humidity_High,Humidity_Normal,Wind_False,Wind_True
0,0,1,0,0,0,1,0,1,1,0
1,0,0,1,1,0,0,1,0,0,1


In [15]:
res = DT.predict(df)
print("Prediction:\nq1: {}\nq2: {}".format(res[0], res[1]))

Prediction:
q1: Yes
q2: No
