## Import the PYCARET library

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sys
from pycaret.classification import *


In [None]:
data= pd.read_csv("../5_OutputData/OUTPUT.csv")
data.sample(8)

## 

## Setting up PyCaret for Classification Task

In [None]:
clf_setup = setup(
    data,
    target="price_range",  
    train_size=0.8,
    normalize=True,
    session_id=42,
    remove_multicollinearity=True,
    fix_imbalance=True
)

### check available models


In [None]:

models()

## Decision Tree Model

In [None]:
accList=[]

In [None]:
dt_model = create_model('dt')
dt_data = pull()
print(dt_model)

In [None]:
accList.append(['DecisionTreeClassifier',dt_data['Accuracy']['Mean']])

In [None]:
p=["#6aa2de","#9d8df2","#e1a9e8","#e8a9c0","#f29d7e","#deb531","#a6c716","#4bc716","#16c774","#0edeed"]
plt.figure(figsize=(16,5))
plt.title("Comparison of Decision Tree Classifier")
sys.barplot(dt_data.Accuracy[:-1],palette =p)
plt.xticks(rotation=90)
plt.show()

In [None]:
evaluate_model(dt_model)

## Fine-tune model parameters

In [None]:
tuned_dt = tune_model(dt_model, optimize='Accuracy',
                      custom_grid={'max_depth': [3, 5, 7, 10],
                                   'min_samples_split': [2, 5, 10],
                                   'criterion': ['gini', 'entropy']
                                  }
                     )

print(tuned_dt)
dt_data_tun=pull()

In [None]:
p=["#6aa2de","#9d8df2","#e1a9e8","#e8a9c0","#f29d7e","#deb531","#a6c716","#4bc716","#16c774","#0edeed"]
plt.figure(figsize=(16,5))
plt.title("Comparison of  Decision Tree Classifier with Tuning")
sys.barplot(dt_data_tun.Accuracy[:-1],palette =p)
plt.xticks(rotation=90)
plt.show()

In [None]:
accList.append(['Decision Tree Classifier with Tuning',dt_data_tun['Accuracy']['Mean']])

In [None]:
df=pd.DataFrame(accList,columns=['Model Name','Accuracy'])
df=df.sort_values(by='Accuracy')
df
p=["#6aa2de","#4bc716"]
plt.title("Comparison of Decision Tree Classifier Model and Decision Tree Classifier with Tuning")
ax=sys.barplot(x=df['Model Name'],y=df['Accuracy'],palette =p)
plt.xticks(rotation=90)
for i in ax.containers:
    ax.bar_label(i, fontsize=10)

In [None]:
evaluate_model(tuned_dt)


In [None]:
predictions = predict_model(tuned_dt)
predictions.head()

## Compare the performance

In [None]:
best_model = compare_models()
dt_results = pull()
best_model

In [None]:
p=["#6aa2de","#9d8df2","#e1a9e8","#e8a9c0","#f29d7e","#deb531","#a6c716","#4bc716","#16c774","#0edeed"]
plt.figure(figsize=(16,5))
plt.title("Comparison of Classification Models")
sys.barplot(dt_results, x="Model", y="Accuracy",palette =p)
plt.title("Feature Importance")
plt.xticks(rotation=90)
plt.show()

In [None]:
plot_model(best_model, plot='confusion_matrix')

In [None]:
plot_model(best_model, plot='feature')