# Comparison of Individual, Bagging and Boosting Algorithms

In [1]:
# import libraries 
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import ML Libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from xgboost import XGBClassifier

In [2]:
# Load the dataset of diamonds
df = sns.load_dataset('diamonds')
df.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [3]:
df.shape

(53940, 10)

In [14]:
# split the data
X = df.drop('cut',axis=1)
y = df['cut']

# encode the data
le = LabelEncoder()
df['color'] = le.fit_transform(df['color'])
df['clarity'] = le.fit_transform(df['clarity'])

# encode the target variable
y = le.fit_transform(y)


# split the data into train and test
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# Decisiontree Classifier

In [7]:
%%time



# call the model
model = DecisionTreeClassifier()
dt = model.fit(x_train,y_train)

# predict the model
y_pred = dt.predict(x_test)

# evaluate the model
print('accuracy_score',accuracy_score(y_test,y_pred))
print('precision_score',precision_score(y_test,y_pred, average='micro'))
print('recall_score',recall_score(y_test,y_pred, average='micro'))
print('f1_score',f1_score(y_test,y_pred, average='micro'))


accuracy_score 0.7131998516870597
precision_score 0.7131998516870597
recall_score 0.7131998516870597
f1_score 0.7131998516870597
CPU times: total: 2.05 s
Wall time: 2.47 s


# Randomforest Classifier

In [8]:
%%time
# call the model
model =RandomForestClassifier()
rf =model.fit(x_train,y_train)

# predict the model
y_pred = rf.predict(x_test)

# evaluate the model
print('accuracy_score',accuracy_score(y_test,y_pred))
print('precision_score',precision_score(y_test,y_pred, average='micro'))
print('recall_score',recall_score(y_test,y_pred, average='micro'))
print('f1_score',f1_score(y_test,y_pred, average='micro'))




accuracy_score 0.7870782350760104
precision_score 0.7870782350760104
recall_score 0.7870782350760104
f1_score 0.7870782350760104
CPU times: total: 48.2 s
Wall time: 57.8 s


# XGBoost Classifier

In [16]:
%%time
# train XGBoost model
model = XGBClassifier()
xgb = model.fit(x_train, y_train)
# predict the model
y_pred = xgb.predict(x_test)

# evaluate the model
print('accuracy_score',accuracy_score(y_test,y_pred))
print('precision_score',precision_score(y_test,y_pred, average='micro'))
print('recall_score',recall_score(y_test,y_pred, average='micro'))
print('f1_score',f1_score(y_test,y_pred, average='micro'))


accuracy_score 0.8056173526140156
precision_score 0.8056173526140156
recall_score 0.8056173526140156
f1_score 0.8056173526140155
CPU times: total: 3.08 s
Wall time: 1.76 s
