<a href="https://colab.research.google.com/github/bayhaqy/Low-Code-MachineLearning/blob/main/Penerapan_Machine_Learning_hingga_Tahapan_Deployment_dengan_Pycaret.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Penerapan Machine Learning hingga Tahapan Deployment dengan Pycaret.ipynb

## Setup Requirement

### Set Log level

In [None]:
# Only enable critical logging (Optional)
import os
os.environ["PYCARET_CUSTOM_LOGGING_LEVEL"] = "CRITICAL"

### Install Library

In [None]:
!pip install pycaret[full] -q
#!pip install mlflow -q
#!pip install pycaret-ts-alpha
!pip install deep_translator -q
!pip install langdetect -q
!pip install vaderSentiment -q

### Check Installed Library

In [None]:
def what_is_installed():
    from pycaret import show_versions
    show_versions()

try:
    what_is_installed()
except ModuleNotFoundError:
    !pip install pycaret
    what_is_installed()

### Import Library

In [None]:
import time
import numpy as np
import pandas as pd
import duckdb

In [None]:
# check docstring to see available plots
help(pd)

# Sales Prediction

## Import dataset dari Pycaret Repository

In [None]:
import pandas as pd

# Load data with specified encoding
sales_data = pd.read_csv('https://github.com/bayhaqy/Datasets/raw/main/Sales/Superstore.csv', encoding='latin1')

In [None]:
pd.set_option('display.max_columns', None)

## Exploratory Data Analysis (EDA)

### Mencari informasi tentang data tersebut

In [None]:
# Melihat 5 data teratas
sales_data.head(5)

In [None]:
sales_data.columns

In [None]:
sales_data['Order Date'] = pd.to_datetime(sales_data['Order Date'], dayfirst=True, errors='coerce').dt.date
sales_data['Ship Date'] = pd.to_datetime(sales_data['Ship Date'], dayfirst=True, errors='coerce').dt.date

In [None]:
# Melihat informasi data
sales_data.info()

In [None]:
# Melihat statistik deskriptif
sales_data.describe()

In [None]:
# Melihat missing value
sales_data.isnull().sum().sort_values(ascending = False)

In [None]:
# Find Duplicate
sales_data.duplicated().any()

In [None]:
duckdb.query("SELECT Country, City, COUNT(*) as Total FROM sales_data group by Country, City order by Total Desc LIMIT 5").df()

In [None]:
duckdb.query("SELECT Category, COUNT(*) as Total FROM sales_data group by Category order by Total Desc LIMIT 5").df() # returns a result dataframe

In [None]:
print('Dataset diambil dari',sales_data['Order Date'].min(),'Sampai',sales_data['Order Date'].max())

In [None]:
# Distribusi nilai dalam kolom kategorikal
print(sales_data['Segment'].value_counts())
print('')
print(sales_data['Sub-Category'].value_counts())
print('')
print(sales_data['Category'].value_counts())
print('')
print(sales_data['Region'].value_counts())
print('')
print(sales_data['Ship Mode'].value_counts())

### Memvisualisasi Data yang ada

In [None]:
from scipy.stats import probplot
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

skewed_cols = ['Quantity','Discount','Profit']

for col in sales_data.select_dtypes(np.number).columns:
    print(f"Skewness of {col}:",sales_data[col].skew())
    print(f"Kurtosis of {col}:",sales_data[col].kurt())
    plt.subplots(nrows=1,ncols=2,figsize=(14,4))
    plt.subplot(1,3,1)
    sns.distplot(sales_data[col])
    plt.subplot(1,3,2)
    sns.boxplot(sales_data[col])
    plt.subplot(1,3,3)
    probplot(sales_data[col],plot=plt,dist='norm',rvalue=True)
    plt.show()

In [None]:
plt.figure(figsize=(10,5))
labels = sales_data['Category'].value_counts().keys()
values = sales_data['Category'].value_counts().values

plt.pie(values,labels=labels,autopct='%1.2f%%',shadow=True)
plt.show()


In [None]:
plt.figure(figsize=(10,5))
sns.countplot(x='Region',data=sales_data)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
labels = sales_data['Segment'].value_counts().keys()
values = sales_data['Segment'].value_counts().values

plt.pie(values,labels=labels,autopct='%1.2f%%',shadow=True,explode=[0.1,0,0],startangle=90,textprops={'fontsize': 14},colors=['crimson','lawngreen','sienna'])
plt.show()

In [None]:
sales_data.query()

In [None]:
sales_data['Ship Mode'].value_counts()

plt.figure(figsize=(10,5))
labels = sales_data['Ship Mode'].value_counts().keys()
values = sales_data['Ship Mode'].value_counts().values

plt.pie(values,labels=labels,autopct='%1.2f%%',shadow=True,explode=[0.1,0.1,0.1,0],startangle=90,textprops={'fontsize': 14},colors=['crimson','lawngreen','sienna','cyan'])
plt.show()

## Data Preprocessing

In [None]:
sales_data['Profit Margin'] = sales_data['Profit']/sales_data['Sales']
sales_data['Discounted Profit'] = sales_data['Profit']-sales_data['Profit']*sales_data['Discount']
sales_data['Discount Percentage'] = sales_data['Discount']/sales_data['Sales']*100
sales_data['Operating Expenses'] = sales_data['Sales'] - sales_data['Profit']
sales_data['Net Profit'] = sales_data['Profit'] - sales_data['Discount']

In [None]:
sales_data['Order Date'] = pd.to_datetime(sales_data['Order Date'],errors='coerce')
sales_data['Ship Date'] = pd.to_datetime(sales_data['Ship Date'],errors='coerce')

sales_data['Order Year'] = sales_data['Order Date'].dt.year
sales_data['Order Month'] = sales_data['Order Date'].dt.month
sales_data['Order Day'] = sales_data['Order Date'].dt.day
sales_data['Order Weekday'] = sales_data['Order Date'].dt.dayofweek
sales_data['Ship Year'] = sales_data['Ship Date'].dt.year
sales_data['Ship Month'] = sales_data['Ship Date'].dt.month
sales_data['Ship Day'] = sales_data['Ship Date'].dt.day
sales_data['Ship Weekday'] = sales_data['Ship Date'].dt.dayofweek
sales_data.drop(['Order Date','Ship Date'],axis=1,inplace=True)

In [None]:
# sales_data = sales_data.drop_duplicates()

# sales_data.drop('Country', axis=1, inplace=True)

# sales_data.drop(['Order ID','Customer ID','Customer Name','Product ID'],axis=1,inplace=True)

# # Menangani missing values
# sales_data.dropna(inplace=True)

# final_selected_features = ['Profit',
#                            'Discount Percentage',
#                            'Postal Code',
#                            'Discount',
#                            'Order Day',
#                            'Order Month',
#                            'Quantity',
#                            'Operating Expenses',
#                            'Ship Day',
#                            'Order Weekday',
#                            'Sales']

# sales_data = sales_data[final_selected_features]
# sales_data.head(5)

In [None]:
# from sklearn.model_selection import train_test_split

# X = sales_data.drop('Sales',axis=1)
# y = sales_data['Sales']

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# initialize setup untuk data transformation
from pycaret.regression import *

sales_set = setup(sales_data, target = 'Sales', transform_target = True, log_experiment='mlflow', experiment_name = 'diamond',use_gpu = False,train_size = 0.8)

## Data Modeling

In [None]:
# check models yang tersedia
sales_set.models()

In [None]:
# compare semua model
best_sales_model = sales_set.compare_models(sort = 'MAE')

In [None]:
# Pilih algoritma
sales_model = sales_set.create_model('xgboost')

In [None]:
# Melakukan tunning hyperparameter pada model secara otomatis
best_sales_model = sales_set.tune_model(sales_model)

## Evaluasi Model

In [None]:
best_sales_model

In [None]:
evaluate_model(best_sales_model)

In [None]:
# check all available config
best_sales_model.get_config()


In [None]:
# plot error
plot_model(best_sales_model, plot = 'error')

# check the residuals of trained model
plot_model(best_sales_model, plot = 'residuals')

# check feature importance
plot_model(best_sales_model, plot ='feature')

In [None]:
# interpret summary model
interpret_model(best_sales_model, plot = 'summary')

## Simpan Model dan Experiment

In [None]:
save_model(best_sales_model, 'best_sales_model')

In [None]:
save_experiment('best_sales_experiment')

## Load Model dan Experiment

In [None]:
# load experiment dan dataframe yang sebelumnya di simpan
exp_from_disk = load_experiment('best_sales_experiment', data=sales_data)


In [None]:
# load model
from pycaret.regression import load_model

pipeline = load_model('best_sales_model')

# print pipeline
print(pipeline)

In [None]:
# Prediksi dari data yang tidak ada variable target
predictions = exp_from_disk.predict_model(pipeline, data = sales_data)
predictions.head(5)

In [None]:
# Copy data dan hapus variable target
data_unseen = sales_data.copy()
data_unseen.drop('Sales', axis = 1, inplace = True)

# Prediksi dari data yang tidak ada variable target
predictions = predict_model(pipeline, data = data_unseen)
predictions.head(5)

## Buat API dan Docker dan Apps

### Buat Apps

In [None]:
# create app
create_app(best_sales_model)

### Buat API

In [None]:
exp_from_disk.create_api(best_sales_model, 'best_sales_model')

### Buat Docker

In [None]:
# Buat dockerfile
create_docker('best_sales_model')

### Check Files

In [None]:
!ls

In [None]:
!cat best_sales_model.py

In [None]:
!cat requirements.txt

In [None]:
!cat Dockerfile

# Regresi

## Import dataset dari Pycaret Repository

In [None]:
from pycaret.datasets import get_data
from pycaret.time_series import TSForecastingExperiment

data_reg = get_data('diamond')

In [None]:
data_reg.set_option('display.max_columns', None)

In [None]:
data_reg

## Exploratory Data Analysis (EDA)

### Mencari informasi tentang data tersebut

In [None]:
data_reg.describe()

In [None]:
data_reg.info()

In [None]:
data_reg.isnull().sum()

### Memvisualisasi Data yang ada

In [None]:
data_reg.plot()

In [None]:
# plot scatter carat_weight dan Price
import plotly.express as px
fig = px.scatter(x=data_reg['Carat Weight'], y=data_reg['Price'], facet_col = data_reg['Cut'],
                 template = 'plotly_dark', opacity = 0.25, trendline='ols', trendline_color_override = 'red',
                 title = 'DATA DIAMOND - CASE STUDY')
fig.show()

In [None]:
# plot histogram
fig = px.histogram(data_reg, x=["Price"], template = 'plotly_dark', title = 'Histogram dari Price')
fig.show()

In [None]:
import numpy as np

# create a copy of data
data_copy = data_reg.copy()

# create a new feature Log_Price
data_copy['Log_Price'] = np.log(data_reg['Price'])

# plot histogram
fig = px.histogram(data_copy, x=["Log_Price"], title = 'Histgram dari Log Price', template = 'plotly_dark')
fig.show()

## Data Preprocessing

In [None]:
# initialize setup untuk data transformation
from pycaret.regression import *

esp_reg = setup(data_reg, target = 'Price', transform_target = True, log_experiment='mlflow', experiment_name = 'diamond',use_gpu = True)

## Data Modeling

In [None]:
# check models yang tersedia
esp_reg.models()

In [None]:
# compare semua model
best_reg = esp_reg.compare_models(sort = 'MAE')

## Evaluasi Model

In [None]:
best_reg

In [None]:
evaluate_model(best_reg)

In [None]:
# check all available config
esp_reg.get_config()


In [None]:
# plot error
plot_model(best_reg, plot = 'error')

# check the residuals of trained model
plot_model(best_reg, plot = 'residuals')

# check feature importance
plot_model(best_reg, plot ='feature')

In [None]:
# interpret summary model
interpret_model(best_reg, plot = 'summary')

## Simpan Model dan Experiment

In [None]:
save_model(best_reg, 'best_reg_model')

In [None]:
save_experiment('best_reg_experiment')

## Load Model dan Experiment

In [None]:
# load experiment from disk
exp_from_disk = load_experiment('best_reg_experiment', data=data_reg)


In [None]:
# load model
from pycaret.regression import load_model

pipeline = load_model('best_reg_model')
# print pipeline
print(pipeline)

In [None]:
# Prediksi dari data yang tidak ada variable target
predictions = exp_from_disk.predict_model(pipeline, data = data_reg)
predictions

In [None]:
# Copy data dan hapus variable target
data_unseen = data_reg.copy()
data_unseen.drop('Price', axis = 1, inplace = True)

# Prediksi dari data yang tidak ada variable target
predictions = predict_model(pipeline, data = data_unseen)
predictions

## Buat API dan Docker dan Apps

### Buat Apps

In [None]:
# create app
create_app(best_reg)

### Buat API

In [None]:
create_api(best_reg, 'best_reg_api')

### Buat Docker

In [None]:
# create docker
create_docker('best_reg_api')

### Check Files

In [None]:
!ls

In [None]:
!cat best_reg_api.py

In [None]:
!cat requirements.txt

In [None]:
!cat Dockerfile

# Klasifikasi

## Import dataset dari Pycaret Repository

In [None]:
from pycaret.datasets import get_data
data_cf = get_data('diabetes')
data_cf

## Data Preprocessing

In [None]:
# Membagi data menjadi training dan testing
from sklearn.model_selection import train_test_split
train, test = train_test_split(data_cf, test_size=0.1, random_state = 42)

In [None]:
# initialize setup untuk data transformation
from pycaret.classification import *
exp_cf = setup(train, target = 'Class variable', log_experiment = True, experiment_name = 'diabetes')

## Data Modeling

In [None]:
# compare semua model
best_cf = exp_cf.compare_models()

### Tuning hyperparameter

In [None]:
# Pilih algoritma
et = exp_cf.create_model('et')

In [None]:
# Melakukan tunning hyperparameter pada model secara otomatis
et_tuned = exp_cf.tune_model(et)

### Ensamble Model

In [None]:
# Membuat model random forest reguler
rf = exp_cf.create_model('rf')

In [None]:
# Membuat ensembling model random forest reguler
rf_bagged = exp_cf.ensemble_model(rf)

In [None]:
# AUC plot
plot_model(rf, plot = 'auc')
# Decision Boundary
plot_model(rf, plot = 'boundary')
# Precision Recall Curve
plot_model(rf, plot = 'pr')
# Validation Curve
plot_model(rf, plot = 'vc')

## Evaluasi Model

In [None]:
evaluate_model(best_cf)

In [None]:
# Prediksi dari train data
holdout_pred = predict_model(best_cf)
holdout_pred

In [None]:
# Prediksi dari test data
predictions = predict_model(best_cf, data = test)
predictions

## Simpan Model

In [None]:
# saving model
save_model(best_cf, model_name = 'best_cf_model')

In [None]:
save_experiment('best_cf_experiment')

## Buat API dan Docker dan Apps

### Buat Apps

In [None]:
# create app
create_app(best_cf)

### Buat API

In [None]:
create_api(best_reg, 'best_reg_api')

### Buat Docker

In [None]:
# create docker
create_docker('best_reg_api')

### Check Files

In [None]:
!ls

In [None]:
!cat best_reg_api.py

In [None]:
!cat requirements.txt

In [None]:
!cat Dockerfile

# Forcasting

## Import dataset dari Pycaret Repository

In [None]:
data_fc = get_data('airline')
data_fc

## Data Preprocessing

In [None]:
# We want to forecast the next 12 months of data and we will use 3 fold cross-validation to test the models.
fh = 12 # or alternately fh = np.arange(1,13)
fold = 3

In [None]:
# Global Figure Settings for notebook ----
# Depending on whether you are using jupyter notebook, jupyter lab, Google Colab, you may have to set the renderer appropriately
# NOTE: Setting to a static renderer here so that the notebook saved size is reduced.
fig_kwargs = {
    "renderer": "notebook",
    "renderer": "png",
    "width": 1000,
    "height": 400,
}

In [None]:
from pycaret.time_series import TSForecastingExperiment

exp = TSForecastingExperiment()
exp.setup(data=data_fc, fh=fh, fold=fold, fig_kwargs=fig_kwargs, session_id=42, verbose=False)
exp.models()

In [None]:
exp_fc = TSForecastingExperiment()

# We can see that specifying a value for point_alpha enables `Enforce Prediction Interval` in the grid (and limits the models).
exp_fc.setup(data=data_fc, fh=fh, fold=fold, fig_kwargs=fig_kwargs, point_alpha=0.5, ignore_seasonality_test=True, seasonal_period=12,sp_detection='auto')
exp_fc.models()

## Data Modeling

In [None]:
best_fc = exp_fc.compare_models()

# # To enable slower models such as prophet, BATS and TBATS, add turbo=False
# best_model = exp.compare_models(turbo=False)

### Using Arima

In [None]:
model_arima = exp_fc.create_model("arima",order=(0,1,0), seasonal_order=(0,1,0,24))

In [None]:
# Fixed Grid Search
tuned_model_arima = exp_fc.tune_model(model_arima, search_algorithm="grid",n_iter=5)
print(model_arima)
print(tuned_model_arima)

## Evaluasi Mode

In [None]:
exp_fc.plot_model([model_arima, tuned_model_arima], data_kwargs={"labels": ["Original", "Tuned"]})

In [None]:
# Regular Plot
exp_fc.plot_model(best_fc)

In [None]:
exp_fc.setup(data=data_fc, fh=fh, fold=fold, fig_kwargs=fig_kwargs, fold_strategy='sliding', verbose=False)
exp_fc.plot_model(best_fc,plot='cv')

In [None]:
# Modified Plot (zoom into the plot to see differences between the 2 plots)
exp.plot_model(model_arima, data_kwargs={"alpha": 0.7, "coverage": 0.8})

In [None]:
exp_fc.finalize_model(model_arima)

## Simpan Model

In [None]:
# With Prediction Interval (default coverage = 0.9)
exp.predict_model(model_arima, return_pred_int=True, coverage=0.8)

In [None]:
# With Custom Point Estimate (alpha = 0.7)
# The point estimate is now higher than before since we are asking for the
# 70% percentile as the point estimate), vs. mean/median before.
exp.predict_model(model_arima, alpha=0.7)

In [None]:
# Increased forecast horizon to 2 years instead of the original 1 year
exp.predict_model(model_arima, fh=24)

# Analisis Sentimen

## Import dataset dari Pycaret Repository

In [None]:
from pycaret.datasets import get_data
data_as = get_data('tweets')

In [None]:
# Remove duplicates
data_as = data_as.drop_duplicates(subset='tweet')

# Drop rows where 'Tweet' is null
data_as.dropna(subset=['tweet'], inplace=True)

In [None]:
# Remove column id
data_as = data_as.drop(columns=['id'])
data_as

## Exploratory Data Analysis (EDA)

In [None]:
data_as.info()

In [None]:
data_as.describe()

## Data Preprocessing

### Translate ke English

In [None]:
from deep_translator import GoogleTranslator
from langdetect import detect
import regex as re

# Function untuk translation
def translate_text(text, source='auto', target='en'):
    try:
        if source != target:
            text = GoogleTranslator(source=source, target=target).translate(text)
        return text

    except Exception as e:
        print(f"Translation error: {str(e)}")
        return text

# Function untuk language detection dengan try-except
def detect_language(text):
    try:
        result = detect(text)
        return result
    except:
        remove_emojis = re.sub(r'[^\w\s]', '', text)
        result = detect(remove_emojis)
        return result


In [None]:
# Add a new column for detected language
data_as['Detect Language'] = data_as['tweet'].apply(lambda tweet: detect_language(tweet))

In [None]:
# Conditionally apply translation function to the 'Translation' column
data_as['Tweet_Translation'] = data_as.apply(lambda row: row['tweet'] if row['Detect Language'] == 'en' else translate_text(row['tweet'], source=row['Detect Language'], target='en'),axis=1)

In [None]:
data_as.head()

### Labeling Data menggunakan VADER dan TextBlob

In [None]:
!pip install vaderSentiment -q

In [None]:
# Sentiment Analysis
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

# Function for sentiment analysis using VADER
def analyze_sentiment_vader(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment = analyzer.polarity_scores(text)
    compound_score = sentiment['compound']
    if compound_score >= 0.05:
        return 'Positive'
    elif compound_score <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# Function for sentiment analysis using TextBlob
def analyze_sentiment_textblob(text):
    analysis = TextBlob(text)
    polarity = analysis.sentiment.polarity
    if polarity > 0:
        return 'Positive'
    elif polarity < 0:
        return 'Negative'
    else:
        return 'Neutral'

In [None]:
# Conditionally apply VADER sentiment analysis to the 'Translation' column
data_as['Sentiment_VADER'] = data_as['Tweet_Translation'].apply(analyze_sentiment_vader)

# Conditionally apply TextBlob sentiment analysis to the 'Translation' column
data_as['Sentiment_TextBlob'] = data_as['Tweet_Translation'].apply(analyze_sentiment_textblob)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Melihat hasil distribusi sentimen menggunakan VADER
sns.countplot(x='Sentiment_VADER', data=data_as)
plt.title('Sentiment VADER Distribution', fontsize=16)
plt.ylabel('Frekuensi', fontsize=16)
plt.xticks(rotation='horizontal')
plt.show()

# Melihat hasil distribusi sentimen menggunakan TEXTBLOB
sns.countplot(x='Sentiment_TextBlob', data=data_as)
plt.title('Sentiment TextBlob Distribution', fontsize=16)
plt.ylabel('Frekuensi', fontsize=16)
plt.xticks(rotation='horizontal')
plt.show()


### Split Data

In [None]:
# Menyimpan DataFrame ke file CSV
data_as.to_csv('data_as.csv', index=False)

In [None]:
data_as.info()

In [None]:
data_as_final = data_as.filter(['tweet','Sentiment_VADER'], axis=1)
data_as_final = data_as_final.rename(columns = {'Sentiment_VADER':'Sentiment'})

### Setup untuk modeling

In [None]:
!pip install pycaret[full]==2.3.10 -q

In [None]:
# initialize setup untuk modeling
from pycaret.nlp import *

exp_as = setup(data = data_as_final, target = 'Sentiment', log_experiment = True, experiment_name = 'SentimentAnalysis',use_gpu = True, session_id = 123,
               text_features_method = 'tf-idf', train_size = 0.8, fix_imbalance = True, fix_imbalance_method = 'SMOTE', outliers_threshold = '0.05',profile = True)

## Data Modeling

In [None]:
# compare semua model
best_as = exp_as.compare_models()

### Tuning hyperparameter

In [None]:
# Pilih algoritma
et = exp_as.create_model('et')

In [None]:
# Melakukan tunning hyperparameter pada model secara otomatis
et_tuned = exp_as.tune_model(et)

### Ensamble Model

In [None]:
# Membuat model random forest reguler
rf = exp_as.create_model('rf')

In [None]:
# Membuat ensembling model random forest reguler
rf_bagged = exp_as.ensemble_model(rf)

In [None]:
# AUC plot
plot_model(rf, plot = 'auc')
# Decision Boundary
plot_model(rf, plot = 'boundary')
# Precision Recall Curve
plot_model(rf, plot = 'pr')
# Validation Curve
plot_model(rf, plot = 'vc')

## Evaluasi Model

In [None]:
evaluate_model(best_as)

In [None]:
# Prediksi dari train data
holdout_pred = predict_model(best_as)
holdout_pred

## Simpan Model

In [None]:
# saving model
save_model(best_cf, model_name = 'best_cf_model')

In [None]:
save_experiment('best_cf_experiment')

In [None]:
!ls


# Enable MLFlow dan Expose ke internet

In [None]:
!pkill -f gunicorn

In [None]:
!mlflow ui &>/content/logs.txt &

In [None]:
cat /content/logs.txt &

In [None]:
#LocalTunnel
#!npm install localtunnel -q
!npx localtunnel --port 5000 &>/content/logs-localtunnel.txt &

In [None]:
!cat /content/logs-localtunnel.txt

In [None]:
!echo 'Put this IP Public to localtunnel:' & curl ipv4.icanhazip.com;

# Running API

In [None]:
!python best_sales_model.py  &>/content/logs.txt &

In [None]:
!cat best_sales_model.py

In [None]:
%%writefile best_sales_model.py
# Import necessary libraries
import pandas as pd
from pycaret.regression import load_model, predict_model
from fastapi import FastAPI
import uvicorn
from pydantic import BaseModel
from fastapi import HTTPException

# Create the FastAPI app
app = FastAPI()

# Load the trained model
model = load_model("best_sales_model")

# Define the input and output Pydantic models
class InputModel(BaseModel):
    Row_ID: int
    Order_ID: str
    Order_Date: str
    Ship_Date: str
    Ship_Mode: str
    Customer_ID: str
    Customer_Name: str
    Segment: str
    Country: str
    City: str
    State: str
    Postal_Code: int
    Region: str
    Product_ID: str
    Category: str
    Sub_Category: str
    Product_Name: str
    Quantity: int
    Discount: float
    Profit: float

class OutputModel(BaseModel):
    prediction: float

# Define the predict function
@app.post("/predict", response_model=OutputModel)
def predict(data: InputModel):
    try:
        # Convert input data to DataFrame
        data_dict = data.dict()
        df = pd.DataFrame([data_dict])

        # Make predictions using the loaded model
        predictions = predict_model(model, data=df)

        # Extract and return the prediction
        prediction_value = predictions["Label"].iloc[0]
        return {"prediction": prediction_value}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run the FastAPI server
if __name__ == "__main__":
    uvicorn.run(app, host="127.0.0.1", port=8000)

In [None]:
!cat /content/logs.txt &

In [None]:
!curl -X POST http://127.0.0.1:8000/predict -H "Content-Type: application/json" -d '{"Row_ID": 210, "Order_ID": "CA-2014-135860", "Order_Date": "02-12-2014", "Ship_Date": "08-12-2014", "Ship_Mode": "Standard Class", "Customer_ID": "JH-15985", "Customer_Name": "Joseph Holt", "Segment": "Consumer", "Country": "United States", "City": "Saginaw", "State": "Michigan", "Postal_Code": 48601, "Region": "Central", "Product_ID": "OFF-FA-10000134", "Category": "Office Supplies", "Sub_Category": "Fasteners", "Product_Name": "Advantus Push Pins, Aluminum Head", "Quantity": 9, "Discount": 0.0, "Profit": 16.20989990234375}'

In [None]:
#LocalTunnel
#!npm install localtunnel -q
!npx localtunnel --port 8000 &>/content/logs-localtunnel.txt &

In [None]:
!cat /content/logs-localtunnel.txt

In [None]:
!echo 'Put this IP Public to localtunnel:' & curl ipv4.icanhazip.com;

In [None]:
!sudo lsof -i :8000

In [None]:
!pkill -f "python best_sales_api.py"
!pkill -f "npx localtunnel --port 8000"

In [None]:
!ps aux | grep "python best_sales_api.py"
!ps aux | grep "npx localtunnel --port 8000"

In [None]:
!kill  34137