**Aim: Implement Decsion Tree classifier**


- Implement Decision Tree classifier using scikit learn library
- Test the classifier for Weather dataset

    Step 1: Import necessary libraries.

In [1]:
from sklearn import preprocessing
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np

    Step 2: Prepare dataset.

In [2]:
#Predictor variables
Outlook = ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast',
            'Rainy', 'Rainy', 'Sunny', 'Rainy','Overcast', 'Overcast', 'Sunny']
Temperature = ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool',
                'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild']
Humidity = ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal',
            'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High']
Wind = ['False', 'True', 'False', 'False', 'False', 'True', 'True',
            'False', 'False', 'False', 'True', 'True', 'False', 'True']

#Class Label:
Play = ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No',
'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']

data = {
    'Outlook':Outlook,
    'Temperature':Temperature,
    'Humidity':Humidity,
    'Wind':Wind,
    'Play':Play
}
dataset = pd.DataFrame(data)
dataset

Unnamed: 0,Outlook,Temperature,Humidity,Wind,Play
0,Rainy,Hot,High,False,No
1,Rainy,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Sunny,Mild,High,False,Yes
4,Sunny,Cool,Normal,False,Yes
5,Sunny,Cool,Normal,True,No
6,Overcast,Cool,Normal,True,Yes
7,Rainy,Mild,High,False,No
8,Rainy,Cool,Normal,False,Yes
9,Sunny,Mild,Normal,False,Yes


    Step 3: Digitize the data set using encoding

In [3]:
# Applying one hot encoding.
from sklearn.preprocessing import OneHotEncoder,LabelEncoder

encoder=OneHotEncoder(sparse=False)
le = LabelEncoder()

Outlook_encoded = pd.DataFrame (encoder.fit_transform(dataset[['Outlook']]))
Outlook_encoded.columns = encoder.get_feature_names(['Outlook'])

Temperature_encoded = pd.DataFrame (encoder.fit_transform(dataset[['Temperature']]))
Temperature_encoded.columns = encoder.get_feature_names(['Temperature'])

Humidity_encoded = pd.DataFrame (encoder.fit_transform(dataset[['Humidity']]))
Humidity_encoded.columns = encoder.get_feature_names(['Humidity'])

Wind_encoded = pd.DataFrame (encoder.fit_transform(dataset[['Wind']]))
Wind_encoded.columns = encoder.get_feature_names(['Wind'])

Play_encoded = pd.DataFrame (le.fit_transform(Play))
dataset['Play'] = Play_encoded
# print("Weather:" ,Outlook_encoded)
# print("Temerature:" ,Temperature_encoded)
# print("Humidity:" ,Humidity_encoded)
# print("Wind:" ,Wind_encoded)
# print("Play:" ,Play_encoded)

    Step 4: Merge different features to prepare dataset

In [4]:
dataset = pd.concat([Outlook_encoded,Temperature_encoded,Humidity_encoded,Wind_encoded,dataset['Play']], axis=1)
print(dataset.describe())

from sklearn.model_selection import train_test_split
X=dataset.values[:,:-1]
Y=dataset.values[:,-1]
X_train, X_test, y_train, y_test = train_test_split( 
    X, Y, test_size = 0.15, random_state = 124)
# print("\n",X)
# print("\n",Y)

       Outlook_Overcast  Outlook_Rainy  ...  Wind_True       Play
count         14.000000      14.000000  ...  14.000000  14.000000
mean           0.285714       0.357143  ...   0.428571   0.642857
std            0.468807       0.497245  ...   0.513553   0.497245
min            0.000000       0.000000  ...   0.000000   0.000000
25%            0.000000       0.000000  ...   0.000000   0.000000
50%            0.000000       0.000000  ...   0.000000   1.000000
75%            0.750000       1.000000  ...   1.000000   1.000000
max            1.000000       1.000000  ...   1.000000   1.000000

[8 rows x 11 columns]


    Step 5: Train ’Create and Train DecisionTreeClassifier’

In [5]:
#Create a Decision Tree Classifier (using Gini Index)
dtc = DecisionTreeClassifier(criterion = "gini",max_leaf_nodes=124,random_state=124)

# Train the model using the training sets
dtc.fit(X_train, y_train)   #(features, Class_label)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=124,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=124, splitter='best')

    Step 6: Predict Output for new data

In [6]:
#Predict Output
y_pred = dtc.predict(X_test)
print("Predicted values:")
print(y_pred)

Predicted values:
[0. 0. 0.]


In [7]:
# Model Accuracy
from sklearn import metrics
print("Confusion Matrix: ",
        metrics.confusion_matrix(y_test, y_pred))
print ("Accuracy : ",
    metrics.accuracy_score(y_test,y_pred)*100)
print("Report : ",
    metrics.classification_report(y_test, y_pred))

Confusion Matrix:  [[1 0]
 [2 0]]
Accuracy :  33.33333333333333
Report :                precision    recall  f1-score   support

         0.0       0.33      1.00      0.50         1
         1.0       0.00      0.00      0.00         2

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
print(dataset.columns)

Index(['Outlook_Overcast', 'Outlook_Rainy', 'Outlook_Sunny',
       'Temperature_Cool', 'Temperature_Hot', 'Temperature_Mild',
       'Humidity_High', 'Humidity_Normal', 'Wind_False', 'Wind_True', 'Play'],
      dtype='object')


## Exercise Questions

1. What will be the value of Play, if Outlook is ’Rainy’, Temperature is ’Mild’, Humidity =’Normal’, and Wind = ’False’?

In [9]:
print(dtc.predict([[0,1,0,0,0,1,0,1,1,0]]))

[0.]


2.  What will be the value of Play, if Outlook is ’Sunny’, Temeprature is ’Cool’, Humidity =’High’, and Wind = ’True’?

In [10]:
print(dtc.predict([[0,0,1,1,0,0,0,1,0,1]]))

[0.]
