# Decision Tree vs Random Forest vs AdaBoost Classifier
by Vinodhini Rajamanickam

### import necessary packages

In [50]:
import pandas as pd

from sklearn.tree import DecisionTreeClassifier

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score 

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split


## zomato file

In [5]:
#load and read data
zomato = pd.read_csv("zomato.csv")

In [6]:
#printing the data
zomato.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [7]:
# rows and columns in data
zomato.shape

(51717, 17)

In [10]:
#checking for null values
zomato.isnull().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [11]:
# dropping unnecessary columns

zomato = zomato.drop(["location","rest_type","cuisines","url","address","name","phone","reviews_list","menu_item","listed_in(type)","listed_in(city)","dish_liked","rate"],axis = 1)

In [14]:
# checking for available columns after dropping some the columns

zomato.columns

Index(['online_order', 'book_table', 'votes', 'approx_cost(for two people)'], dtype='object')

In [16]:
#checking for null values

zomato.isnull().sum()

online_order                     0
book_table                       0
votes                            0
approx_cost(for two people)    346
dtype: int64

In [17]:
#filling null values using padding method

zomato = zomato.fillna(method = "pad")

#check again for null values
zomato.isnull().sum()

online_order                   0
book_table                     0
votes                          0
approx_cost(for two people)    0
dtype: int64

In [19]:
#Label Encoding

le = LabelEncoder()

zomato["online_order"] = le.fit_transform(zomato["online_order"])

zomato["book_table"] = le.fit_transform(zomato["book_table"])

zomato["approx_cost(for two people)"] = le.fit_transform(zomato["approx_cost(for two people)"])

In [27]:
# checking datatype after label encoding

print(zomato["online_order"].dtype)

print(zomato["book_table"].dtype)

print(zomato["approx_cost(for two people)"].dtype)

int64
int32
int32


In [28]:
zomato.head()

Unnamed: 0,online_order,book_table,votes,approx_cost(for two people)
0,1,1,775,66
1,1,0,787,66
2,1,0,918,66
3,0,0,88,42
4,0,0,166,60


building model

In [32]:
#defining x and y values

x = zomato.drop(["book_table"],axis = 1)
y = zomato["book_table"]

In [33]:
#train_test_split
# splitting data into train and test data

x_train , x_test, y_train, y_test = train_test_split(x,y, test_size = 0.3)

### Decision Tree Classifier

In [34]:
dtc = DecisionTreeClassifier()

dtc.fit(x_train, y_train)
prediction = dtc.predict(x_train)

In [37]:
#check performance of decison tree for trainig data

print("confusion matrix:")
print(confusion_matrix(y_train, prediction))
print("\n")
print("accuracy score : ")
print(accuracy_score(y_train,prediction))

confusion matrix:
[[31581    67]
 [  264  4289]]


accuracy score : 
0.9908566061711003


In [40]:
#prediction and accuracy for test data
test_predict = dtc.predict(x_test)
print(accuracy_score(y_test, test_predict))

0.9777004382572828


### Random forest classifier

In [41]:
rfc = RandomForestClassifier()

rfc.fit(x_train, y_train)
prediction = rfc.predict(x_train)

In [42]:
#check performance of random forest for trainig data

print("confusion matrix:")
print(confusion_matrix(y_train, prediction))
print("\n")
print("accuracy score : ")
print(accuracy_score(y_train,prediction))

confusion matrix:
[[31569    79]
 [  252  4301]]


accuracy score : 
0.9908566061711003


In [45]:
#prediction and accuracy for test data
test_predict = rfc.predict(x_test)
print(accuracy_score(y_test, test_predict))

0.9793761278680072


### AdaBoost classifier

In [43]:
adb = AdaBoostClassifier()

adb.fit(x_train, y_train)
prediction = adb.predict(x_train)

In [44]:
#check performance of adaboost for trainig data

print("confusion matrix:")
print(confusion_matrix(y_train, prediction))
print("\n")
print("accuracy score : ")
print(accuracy_score(y_train,prediction))

confusion matrix:
[[30554  1094]
 [ 1424  3129]]


accuracy score : 
0.9304439103892158


In [46]:
#prediction and accuracy for test data
test_predict = adb.predict(x_test)
print(accuracy_score(y_test, test_predict))

0.9289765403454499


#### what I learn from this?

Based on the confusion matrices and accuracy scores that we got, it appears that the Decision Tree Classifier and Random Forest Classifier are performing quite well on the  dataset. 
Both models have high accuracy scores, with the Decision Tree Classifier and Random Forest Classifier achieving an accuracy score of 0.9908 on the training data.


The confusion matrices show that both models have very low false positive and false negative rates.
This suggests that the models are able to accurately classify the data and make reliable predictions.

On the other hand, the Adaboost Classifier has a lower accuracy score of 0.9304 on the training data, 
indicating that it may not be performing as well as the Decision Tree Classifier and Random Forest Classifier.

Overall, it seems that the Decision Tree Classifier and Random Forest Classifier are strong models for the dataset, while the Adaboost Classifier may need 
further tuning or exploration to improve its performance