# Day 21 - 06 - Machine Learning - Decision Tree

## Importing Libraries

In [1]:
# Importing Libraries
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


## Importing data set

In [41]:
df = pd.read_csv('mldata_21.csv')
print(df.head(20))

    age   height  weight  gender likeness
0    27  170.688    76.0    Male  Biryani
1    41  165.000    70.0    Male  Biryani
2    29  171.000    80.0    Male  Biryani
3    27  173.000   102.0    Male  Biryani
4    29  164.000    67.0    Male  Biryani
5    28  174.000    46.0  Female  Biryani
6    27  151.000    64.3  Female  Biryani
7    34  176.500    98.0    Male  Biryani
8    32  181.000    87.5    Male  Biryani
9    22  184.500    80.0    Male  Biryani
10   29  163.500    65.0    Male   Pakora
11   29  176.000    78.0    Male   Samosa
12   23  178.000    62.0    Male   Samosa
13   28  168.000    74.0    Male  Biryani
14   25  165.000   102.0    Male  Biryani
15   27  174.000    78.0    Male   Samosa
16   33  162.000    93.0    Male  Biryani
17   28  176.500    68.0    Male   Samosa
18   21    5.600    71.5    Male  Biryani
19   25  185.900   135.0    Male  Biryani


## Converting Gender in to Numeric data

In [7]:
df['gender'] = df['gender'].replace('Male',1)
df['gender'] = df['gender'].replace('Female',0)
df['gender']

0      1
1      1
2      1
3      1
4      1
      ..
240    1
241    1
242    1
243    1
244    0
Name: gender, Length: 245, dtype: int64

## Making Descriptive and Target Features

In [9]:
descriptive_features = df[['weight','gender']]
descriptive_features

Unnamed: 0,weight,gender
0,76.0,1
1,70.0,1
2,80.0,1
3,102.0,1
4,67.0,1
...,...,...
240,60.0,1
241,70.0,1
242,80.0,1
243,65.0,1


In [11]:
target_feature = df['likeness']
target_feature

0      Biryani
1      Biryani
2      Biryani
3      Biryani
4      Biryani
        ...   
240     Pakora
241    Biryani
242    Biryani
243    Biryani
244     Samosa
Name: likeness, Length: 245, dtype: object

## Importing Decision Tree library


In [12]:
from sklearn.tree import DecisionTreeClassifier
# Fitting Model
classifier = DecisionTreeClassifier()
classifier.fit(descriptive_features,target_feature)

DecisionTreeClassifier()

## Making prediction

In [13]:
classifier.predict([[23,0]])   # Female of weight 23

array(['Biryani'], dtype=object)

## Splitting data in to Training and Test sets to measure accuracy

In [17]:
# Splitting data
from sklearn.model_selection import train_test_split 
x_train, x_test, y_train, y_test = train_test_split(descriptive_features, target_feature, test_size=1/5)

In [18]:
classifier = DecisionTreeClassifier()
classifier.fit(x_train,y_train)
predicted_values = classifier.predict(x_test)
predicted_values

array(['Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Samosa', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Pakora', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani'], dtype=object)

## Measure Accuracy

In [19]:
# Measuring score
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test,predicted_values)
score

0.6326530612244898

## Train and save your model to avoid training everytime


In [30]:
import pandas as pd 
from sklearn.tree import DecisionTreeClassifier
import joblib

model = DecisionTreeClassifier()
model.fit(descriptive_features,target_feature)
joblib.dump(model, 'foodie_asad.joblib')

['foodie_asad.joblib']

## Import and Run saved Model

In [33]:
new_model = joblib.load('foodie_asad.joblib')
predicted_values_new_model = new_model.predict(x_test)
predicted_values_new_model

array(['Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Samosa', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Samosa', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani', 'Biryani',
       'Biryani', 'Biryani', 'Pakora', 'Biryani', 'Biryani', 'Biryani',
       'Biryani'], dtype=object)

## Visualizing Graph

In [35]:
from sklearn import tree
tree.export_graphviz(new_model, out_file='foodie.dot', feature_names=['age','gender'],
                    class_names=sorted(target_feature.unique()),
                    label= 'all', rounded=True, filled=True)

## Making Descriptive and Target Features for complete Data set

In [36]:
descriptive_features = df[['age','height','weight','gender']]
descriptive_features

Unnamed: 0,age,height,weight,gender
0,27,170.688,76.0,1
1,41,165.000,70.0,1
2,29,171.000,80.0,1
3,27,173.000,102.0,1
4,29,164.000,67.0,1
...,...,...,...,...
240,31,160.000,60.0,1
241,26,172.000,70.0,1
242,40,178.000,80.0,1
243,25,5.700,65.0,1


In [37]:
target_feature = df['likeness']
target_feature

0      Biryani
1      Biryani
2      Biryani
3      Biryani
4      Biryani
        ...   
240     Pakora
241    Biryani
242    Biryani
243    Biryani
244     Samosa
Name: likeness, Length: 245, dtype: object

## Making and Fitting Model


In [38]:

# Fitting Model
classifier_complete = DecisionTreeClassifier()
classifier_complete.fit(descriptive_features,target_feature)

DecisionTreeClassifier()

## Making prediction

In [45]:
print(classifier_complete.predict([[33,157,56,0]]))   # Female of age 33, height=156 and weight = 55kg
print(classifier_complete.predict([[29,163,65,1]]))  # Male of age 29, height 163 and weight =65kg
print(classifier_complete.predict([[27,170,76,1]]))   # Male of age 27, height 170 and weight =76kg

['Samosa']
['Pakora']
['Biryani']
