<a href="https://colab.research.google.com/github/amirkasaei/Decision-Tree-Classifier-with-scikit-learn/blob/main/Decision_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Required Libraries**

**install graphviz and pydotplus**

In [None]:
# pip install -q graphviz

In [None]:
# pip install -q pydotplus

**load the required libraries**

In [None]:
# Load libraries
import pandas as pd
import torch
import tensorflow as tf
import numpy as np
from sklearn.tree import DecisionTreeClassifier, plot_tree # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import export_graphviz
from six import StringIO
from IPython.display import Image
import pydotplus

**set GPU**

In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


# **Loading Data**

In [None]:
url = 'https://raw.githubusercontent.com/amirkasaei/Decision-Tree-with-Python/main/Dataset/connect-4.data'

In [None]:
feature_columns = [str(i) for i in range(1, 43)]
column_names = feature_columns + ['class']

In [None]:
df = pd.read_csv(url, names=column_names)
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,34,35,36,37,38,39,40,41,42,class
0,b,b,b,b,b,b,b,b,b,b,...,b,b,b,b,b,b,b,b,b,win
1,b,b,b,b,b,b,b,b,b,b,...,b,b,b,b,b,b,b,b,b,win
2,b,b,b,b,b,b,o,b,b,b,...,b,b,b,b,b,b,b,b,b,win
3,b,b,b,b,b,b,b,b,b,b,...,b,b,b,b,b,b,b,b,b,win
4,o,b,b,b,b,b,b,b,b,b,...,b,b,b,b,b,b,b,b,b,win


# **Feature Selection**

**encode dataset**

In [None]:
le = LabelEncoder()

df[column_names] = df[column_names].apply(le.fit_transform, axis=0)

In [None]:
classes = le.classes_
classes

array(['draw', 'loss', 'win'], dtype=object)

In [None]:
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,34,35,36,37,38,39,40,41,42,class
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2


**divide given columns into two types of variables (target variable and feature variables).**

In [None]:
x = df[column_names[:-1]]
y = df['class']

x.shape, y.shape

((67557, 42), (67557,))

# **Splitting Data**

80% training and 20% test

In [None]:
# Split dataset into training set and test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

# **Building Decision Tree Model**

create a decision tree model using Scikit-learn

In [None]:
# Create Decision Tree classifer object
dtc_ent = DecisionTreeClassifier(criterion='entropy')
dtc_gini = DecisionTreeClassifier(criterion='gini')

# Train Decision Tree Classifer
dtc_ent = dtc_ent.fit(x_train, y_train)
dtc_gini = dtc_gini.fit(x_train, y_train)


# Predict the response for test dataset
y_pred_ent = dtc_ent.predict(x_test)
y_pred_gini = dtc_gini.predict(x_test)

# **Evaluating the Model**

In [None]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy (entropy):", metrics.accuracy_score(y_test, y_pred_ent))

print("Accuracy (gini):", metrics.accuracy_score(y_test, y_pred_gini))


Accuracy (entropy): 0.7395648312611013
Accuracy (gini): 0.7374925991711072


# **Visualizing Decision Trees**

In [None]:
with tf.device('/GPU:0'):
  dot_data = StringIO()
  export_graphviz(dtc_ent, out_file=dot_data, filled=True, rounded=True, special_characters=True,feature_names = feature_columns,class_names=classes)
  graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
  graph.write_png('decisionTreeClassifierEntropy.png')
  Image(graph.create_png())

In [None]:
dot_data = StringIO()
export_graphviz(dtc_gini, out_file=dot_data,
                filled=True, rounded=True,
                special_characters=True,feature_names = feature_columns,class_names=classes)
gini_graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
gini_graph.write_png('decisionTreeClassifierGini.png')
Image(graph.create_png())