# Decision Tree Algorithm In Machine Learning

In [1]:
import pandas as pd
df = pd.read_csv('mldata.csv')
df.head()

Unnamed: 0,age,height,weight,likeness,gender
0,27,170.688,76.0,Biryani,Male
1,41,165.0,70.0,Biryani,Male
2,29,171.0,80.0,Biryani,Male
3,27,173.0,102.0,Biryani,Male
4,29,164.0,67.0,Biryani,Male


In [2]:
# we can convert values in a dataframe
df['gender'] = df['gender'].replace("Male", 1)
df['gender'] = df['gender'].replace("Female", 0)
df.head()

Unnamed: 0,age,height,weight,likeness,gender
0,27,170.688,76.0,Biryani,1
1,41,165.0,70.0,Biryani,1
2,29,171.0,80.0,Biryani,1
3,27,173.0,102.0,Biryani,1
4,29,164.0,67.0,Biryani,1


In [3]:
# selection of input and output variables
X = df[['age', 'gender']]
y = df['likeness']

In [4]:
# apply machine learning algorithm
from sklearn.tree import DecisionTreeClassifier

# create and fit our model
model = DecisionTreeClassifier().fit(X, y)

# predict
model.predict([[55, 0]])



array(['Samosa'], dtype=object)

# how to measure the accuracy of our model

**1. Split Data Into Train And Test (80/20)**

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# if we dont assign random_state to 0/1 model will itself select random data to test and accuracy varies everytime the model runs 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# create and fit model
model2 = DecisionTreeClassifier().fit(X_train, y_train)

# predict values
predicted_values = model2.predict(X_test)

# checking score
score = accuracy_score(y_test, predicted_values)
score

0.5952380952380952

In [6]:
# how to train and save/store your model to save your time for not training it again and again
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import joblib


# file extension must be .joblib
joblib.dump(model, 'stored_model.joblib')

['stored_model.joblib']

In [7]:
# Assignment (how to run/load a stored model on your data)

stored_model = joblib.load('stored_model.joblib')
stored_model.score(X, y)

0.6923076923076923

In [8]:
# how to plot graph on our model
from sklearn import tree

# graphic evaluation / look into what happened
# file extension must be .dot
tree.export_graphviz(model, 
                    out_file='output_graph.dot', 
                    feature_names=['age', 'gender'], 
                    class_names=sorted(y.unique()), 
                    label='all',
                    rounded=True,
                    filled=True)