In [4]:
#import all necessary libraries.
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#load the CSV data file using the pandas read_csv method.
Buy_Computer = pd.read_csv(r'BuyComputerData.csv')

#Our data contains details about the age, income, student and credit_rating. 
#The last column is the target variable that suggests the possibility of buying computer.
Buy_Computer.head()

Unnamed: 0,age,income,student,credit_rating,buys_computer
0,<=30,high,no,fair,no
1,<=30,high,no,excellent,no
2,31-40,high,no,fair,yes
3,>40,medium,no,fair,yes
4,>40,low,yes,fair,yes


In [5]:
#we use the Python library SKLearn to create a model and make predictions. 
#SKLearn library requires the features to be numerical arrays. 
#So we will need to convert the categorical information in our data into numbers.
#There are multiple ways of doing this, we will keep is simple and use a LabelEncoder for this example.
#A LabelEncoder converts a categorical data into a number ranging from 0 to n-1, 
#where n is the number of classes in the variable.
#For example, in case of income, there are 3 clasess – high, low, medium. 
#These are represented as 0,1,2 in alphabetical order.
number = LabelEncoder()
Buy_Computer['age'] = number.fit_transform(Buy_Computer['age'])
Buy_Computer['income'] = number.fit_transform(Buy_Computer['income'])
Buy_Computer['student'] = number.fit_transform(Buy_Computer['student'])
Buy_Computer['credit_rating'] = number.fit_transform(Buy_Computer['credit_rating'])
Buy_Computer['buys_computer'] = number.fit_transform(Buy_Computer['buys_computer'])

#Create the model.
#Define the features and the target variables.
features = ["age", "income", "student", "credit_rating"]
target = "buys_computer"

#To validate the performance of our model, we create a train, test split. 
#We build the model using the train dataset and we will validate the model on the test dataset.
#We use SKLearn’s train_test_split to do this.
features_train, features_test, target_train, target_test = train_test_split(Buy_Computer[features],
Buy_Computer[target],
test_size = 0.33,
random_state = 54)

#Create the model.
model = DecisionTreeClassifier(criterion='entropy', random_state=0)
model.fit(features_train, target_train)

#Make predictions on the test features.
#We will also measure the performance of the model using accuracy score.
#Accuracy score measure the number of right predictions.
pred = model.predict(features_test)
accuracy = accuracy_score(target_test, pred)
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",accuracy_score(target_test, pred))
#The accuracy is in this case about 0.4

#Now suppose we want to predict for the conditions,
#age	income	student	credit_rating
#<=30	medium	yes	fair
print (model.predict([[0,2,1,1]]))
#which gives a prediction 1 (yes)

Accuracy: 0.4
[1]
