In [None]:
# run this cell to install pycaret in Google Colab
!pip install pycaret

In [None]:
from pycaret.utils import version
version()

# 1. Importing Dataset

In [None]:
import random 
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
# from pycaret.datasets import get_data
# data = get_data('juice')

train = pd.read_csv('/content/gdrive/My Drive/Piramal DS Hiring Challenge/Train.csv')
train.head()
# train.info()

# 2. Setting up Environment

In [None]:
from pycaret.classification import *
clf1 = setup(train, target = 'Problem Category', session_id=7582 )

# 3. Compare Models

In [None]:
best_model = compare_models()

# 4. Create Model

In [None]:
rf = create_model('rf')

In [None]:
dt = create_model('dt')

# 5. Tune Model

In [None]:
tune_best_model = tune_model(rf, optimize = 'F1')

In [None]:
best_model_cv  = automl()
print(best_model_cv)

In [None]:
best_model_holdout = automl(use_holdout=True)
print(best_model_holdout)

In [None]:
predict_model(sc)

In [None]:
print(tuned_nb)

# 6. Ensemble Model

In [None]:
bagged_dt = ensemble_model(dt, n_estimators=25)


In [None]:
boosted_dt = ensemble_model(dt, method = 'Boosting')


# 7. Blend Models

In [None]:
lr = create_model('lr', verbose=False)
lda = create_model('lda', verbose=False)
gbc = create_model('gbc', verbose=False)

In [None]:
blender = blend_models(estimator_list=[lr,lda,gbc], method = 'soft')


In [None]:
blender.estimators_

# 8. Analyze Model

In [None]:
plot_model(blender)


In [None]:
plot_model(blender, plot = 'confusion_matrix')


In [None]:
plot_model(blender, plot = 'threshold')


In [None]:
plot_model(blender, plot = 'pr')


In [None]:
plot_model(tuned_dt, plot = 'vc')


In [None]:
plot_model(dt, plot = 'boundary')


In [None]:
plot_model(tuned_nb, plot = 'boundary')


In [None]:
plot_model(blender, plot = 'boundary')


In [None]:
evaluate_model(tuned_nb)


# 9. Interpret Model

In [None]:
xgboost = create_model('xgboost')


In [None]:
!pip install shap

In [None]:
interpret_model(xgboost)


In [None]:
interpret_model(xgboost, plot = 'correlation')


In [None]:
interpret_model(xgboost, plot = 'reason', observation=1)


In [None]:
interpret_model(xgboost, plot = 'reason')


# 10. Deploy Model

In [None]:
deploy_model(xgboost, model_name = 'xgboost-for-aws', authentication = {'bucket' : 'pycaret-test'})


In [None]:
save_model(xgboost, 'abc')


In [None]:
l = load_model('abc')


In [None]:
lr = create_model('lr')


In [None]:
predict_model(lr)
