# Build Machine Learning Model dengan Pycaret

## Setup Requirement

### Set Log level

In [None]:
# Only enable critical logging (Optional)
import os
os.environ["PYCARET_CUSTOM_LOGGING_LEVEL"] = "CRITICAL"

### Install Library

In [None]:
!pip install pip install pycaret[full] -q
#!pip install mlflow -q
#!pip install pycaret-ts-alpha

### Check Installed Library

In [None]:
def what_is_installed():
    from pycaret import show_versions
    show_versions()

try:
    what_is_installed()
except ModuleNotFoundError:
    !pip install pycaret
    what_is_installed()

### Import Library

In [None]:
import time
import numpy as np
import pandas as pd

from pycaret.datasets import get_data
from pycaret.time_series import TSForecastingExperiment

In [None]:
# check docstring to see available plots
help(get_data)

# Regresi

## Import dataset dari Pycaret Repository

In [None]:
from pycaret.datasets import get_data
data_reg = get_data('diamond')

In [None]:
data_reg

## Exploratory Data Analysis (EDA)

### Mencari informasi tentang data tersebut

In [None]:
data_reg.describe()

In [None]:
data_reg.info()

In [None]:
data_reg.isnull().sum()

### Memvisualisasi Data yang ada

In [None]:
data_reg.plot()

In [None]:
# plot scatter carat_weight dan Price
import plotly.express as px
fig = px.scatter(x=data_reg['Carat Weight'], y=data_reg['Price'], facet_col = data_reg['Cut'],
                 template = 'plotly_dark', opacity = 0.25, trendline='ols', trendline_color_override = 'red',
                 title = 'DATA DIAMOND - CASE STUDY')
fig.show()

In [None]:
# plot histogram
fig = px.histogram(data_reg, x=["Price"], template = 'plotly_dark', title = 'Histogram dari Price')
fig.show()

In [None]:
import numpy as np

# create a copy of data
data_copy = data_reg.copy()

# create a new feature Log_Price
data_copy['Log_Price'] = np.log(data_reg['Price'])

# plot histogram
fig = px.histogram(data_copy, x=["Log_Price"], title = 'Histgram dari Log Price', template = 'plotly_dark')
fig.show()

## Data Preprocessing

In [None]:
# initialize setup untuk data transformation
from pycaret.regression import *

esp_reg = setup(data_reg, target = 'Price', transform_target = True, log_experiment='mlflow', experiment_name = 'diamond',use_gpu = False)

In [None]:
# check statistical tests on original data
esp_reg.check_stats()

## Data Modeling

In [None]:
# check models yang tersedia
esp_reg.models()

In [None]:
# compare semua model
best_reg = esp_reg.compare_models(sort = 'MAE')

## Evaluasi Model

In [None]:
evaluate_model(best_reg)

In [None]:
# check final params dari best model
esp_regz.get_params()

In [None]:
# check all available config
best_fc.get_config()


In [None]:
# plot error
plot_model(best_reg, plot = 'error')

# check the residuals of trained model
plot_model(best_reg, plot = 'residuals')

# check feature importance
plot_model(best_reg, plot ='feature')

In [None]:
# interpret summary model
interpret_model(best_reg, plot = 'summary')

## Simpan Model dan Experiment

In [None]:
save_model(best_reg, 'best_reg_model')

In [None]:
save_experiment('best_reg_experiment')

In [None]:
!ls

## Load Model dan Experiment

In [None]:
# load experiment from disk
exp_from_disk = load_experiment('best_reg_experiment', data=data_reg)


In [None]:
# load model
from pycaret.regression import load_model

pipeline = load_model('best_cf_model')
# print pipeline
print(pipeline)

In [None]:
# Prediksi dari data yang tidak ada variable target
predictions = exp_from_disk.predict_model(pipeline, data = data_reg)
predictions

In [None]:
# Copy data dan hapus variable target
data_unseen = data_reg.copy()
data_unseen.drop('Price', axis = 1, inplace = True)

# Prediksi dari data yang tidak ada variable target
predictions = predict_model(pipeline, data = data_unseen)
predictions

# Klasifikasi

## Import dataset dari Pycaret Repository

In [None]:
from pycaret.datasets import get_data
data_cf = get_data('diabetes')
data_cf

## Data Preprocessing

In [None]:
# Membagi data menjadi training dan testing
from sklearn.model_selection import train_test_split
train, test = train_test_split(data_cf, test_size=0.1, random_state = 42)

In [None]:
# initialize setup untuk data transformation
from pycaret.classification import *
exp_cf = setup(train, target = 'Class variable', log_experiment = True, experiment_name = 'diabetes')

## Data Modeling

In [None]:
# compare semua model
best_cf = exp_cf.compare_models()

### Tuning hyperparameter

In [None]:
# Pilih algoritma
et = exp_cf.create_model('et')

In [None]:
# Melakukan tunning hyperparameter pada model secara otomatis
et_tuned = exp_cf.tune_model(et)

### Ensamble Model

In [None]:
# Membuat model random forest reguler
rf = exp_cf.create_model('rf')

In [None]:
# Membuat ensembling model random forest reguler
rf_bagged = exp_cf.ensemble_model(rf)

In [None]:
# AUC plot
plot_model(rf, plot = 'auc')
# Decision Boundary
plot_model(rf, plot = 'boundary')
# Precision Recall Curve
plot_model(rf, plot = 'pr')
# Validation Curve
plot_model(rf, plot = 'vc')

## Evaluasi Model

In [None]:
evaluate_model(best_cf)

In [None]:
# Prediksi dari train data
holdout_pred = predict_model(best_cf)
holdout_pred

In [None]:
# Prediksi dari test data
predictions = predict_model(best_cf, data = test)
predictions

## Simpan Model

In [None]:
# saving model
save_model(best_cf, model_name = 'best_cf_model')

In [None]:
save_experiment('best_cf_experiment')

In [None]:
!ls

# Forcasting

## Import dataset dari Pycaret Repository

In [None]:
data_fc = get_data('airline')
data_fc

## Data Preprocessing

In [None]:
# We want to forecast the next 12 months of data and we will use 3 fold cross-validation to test the models.
fh = 12 # or alternately fh = np.arange(1,13)
fold = 3

In [None]:
# Global Figure Settings for notebook ----
# Depending on whether you are using jupyter notebook, jupyter lab, Google Colab, you may have to set the renderer appropriately
# NOTE: Setting to a static renderer here so that the notebook saved size is reduced.
fig_kwargs = {
    "renderer": "notebook",
    "renderer": "png",
    "width": 1000,
    "height": 400,
}

In [None]:
from pycaret.time_series import TSForecastingExperiment

exp = TSForecastingExperiment()
exp.setup(data=data_fc, fh=fh, fold=fold, fig_kwargs=fig_kwargs, session_id=42, verbose=False)
exp.models()

In [None]:
exp_fc = TSForecastingExperiment()

# We can see that specifying a value for point_alpha enables `Enforce Prediction Interval` in the grid (and limits the models).
exp_fc.setup(data=data_fc, fh=fh, fold=fold, fig_kwargs=fig_kwargs, point_alpha=0.5, ignore_seasonality_test=True, seasonal_period=12,sp_detection='auto')
exp_fc.models()

## Data Modeling

In [None]:
best_fc = exp_fc.compare_models()

# # To enable slower models such as prophet, BATS and TBATS, add turbo=False
# best_model = exp.compare_models(turbo=False)

### Using Arima

In [None]:
model_arima = exp_fc.create_model("arima",order=(0,1,0), seasonal_order=(0,1,0,24))

In [None]:
# Fixed Grid Search
tuned_model_arima = exp_fc.tune_model(model_arima, search_algorithm="grid",n_iter=5)
print(model_arima)
print(tuned_model_arima)

## Evaluasi Mode

In [None]:
exp_fc.plot_model([model_arima, tuned_model_arima], data_kwargs={"labels": ["Original", "Tuned"]})

In [None]:
# Regular Plot
exp_fc.plot_model(best_fc)

In [None]:
exp_fc.setup(data=data_fc, fh=fh, fold=fold, fig_kwargs=fig_kwargs, fold_strategy='sliding', verbose=False)
exp_fc.plot_model(best_fc,plot='cv')

In [None]:
# Modified Plot (zoom into the plot to see differences between the 2 plots)
exp.plot_model(model_arima, data_kwargs={"alpha": 0.7, "coverage": 0.8})

In [None]:
exp_fc.finalize_model(model_arima)

## Simpan Model

In [None]:
# With Prediction Interval (default coverage = 0.9)
exp.predict_model(model_arima, return_pred_int=True, coverage=0.8)

In [None]:
# With Custom Point Estimate (alpha = 0.7)
# The point estimate is now higher than before since we are asking for the
# 70% percentile as the point estimate), vs. mean/median before.
exp.predict_model(model_arima, alpha=0.7)

In [None]:
# Increased forecast horizon to 2 years instead of the original 1 year
exp.predict_model(model_arima, fh=24)


## Enable MLFlow dan Expose ke internet

In [None]:
!pkill -f gunicorn

In [None]:
!mlflow ui &>/content/logs.txt &

In [None]:
cat /content/logs.txt &

In [None]:
#LocalTunnel
#!npm install localtunnel -q
!npx localtunnel --port 5000 &>/content/logs-localtunnel.txt &

In [None]:
!cat /content/logs-localtunnel.txt

In [None]:
!echo 'Put this IP Public to localtunnel:' & curl ipv4.icanhazip.com;