# Multi-objective Optimization Streamlit Application
- Author: senkin.zhan@datarobot.com, yifu.gu@datarobot.com

## Summary
This accelerator introduces an approach to build a streamlit application using DataRobot deploments for multi-objective optimization.

This notebook outlines how to:

1. Create multiple projects
2. Build deployment with best model
3. Build Streamlit application connecting to DataRobot


## Setup

### Install and import libraries

In [10]:
%pip install drapps pathspec -q

In [None]:
import datetime as dt
import os
import pickle
import time

from dask import compute, delayed
import datarobot as dr
from datarobot.models.dataset import Dataset
import numpy as np
import pandas as pd

if not os.getenv("DATAROBOT_NOTEBOOK_IMAGE"):
    print("not running in DataRobot Notebook")
    from dotenv import load_dotenv
    load_dotenv()

### Bind variables

In [None]:
# Application Path
# アプリのソースコードのフォルダパス
input_path = "./multi-objective-optimization-files/"

# If use groupkfold input group column ["Group_ID"], if use randomkfold input False
# グループパーティション使用したい場合は`True`, `False`の場合はランダムパーティション
group_col = False

# File name, default is japanese version, if use english input steel_strength_en
# データセットのファイル名を指定
file_name = "steel_strength_jp.csv"

# Targets(multiple objective), default is japanese version, if use english input ['Yield strength','Tensile strength','Average strength']
# 目的変数を指定
targets = ["降伏強度", "引張強度", "平均強度"]

# Optimization Directions, choose minimize or maximize,max 30 targets
# 目的関数の最適化方向を指定
directions = ["maximize", "maximize", "maximize"]

# Streamlit App name to upload to DataRobot, cannot be duplicated in the same org
# DataRobotにアップロードするカスタムアプリの名前、同じ組織内で重複できない
app_name = "multi-objective-optimization-demo"

### Run autopilot on datarobot

In [15]:
%%time


def __run_autopilot(target, df, group_col):
    project = dr.Project.create(df, project_name=target)

    ao = dr.AdvancedOptions()
    if group_col != False:
        pm = dr.GroupCV(holdout_pct=0, partition_key_cols=[group_col], reps=5)
    else:
        pm = dr.RandomCV(holdout_pct=0, reps=5)

    project.analyze_and_model(
        worker_count=-1,
        target=target,
        mode="quick",
        partitioning_method=pm,
        advanced_options=ao,
        max_wait=10000,
    )

    return project


def __wait_for_cv(projects):
    for i in range(len(projects)):
        try:
            project = dr.Project.get(project_id=projects[i - 1].id)
            jobs_list = project.get_all_jobs()
            for job in jobs_list:
                job.wait_for_completion(max_wait=60000)
            print("Project " + targets[i] + " completed running autopilot")
        except:
            print("Project " + targets[i] + " occured error")


df = pd.read_csv(input_path + file_name)
df.to_csv(input_path + "feature.csv", index=False)
df_feature = df.drop(targets, axis=1)

delayed_dr_projects = []
for i in range(len(targets)):
    df_ = df_feature.copy()
    df_[targets[i]] = df[targets[i]]
    temp = delayed(__run_autopilot)(targets[i], df_, group_col)
    delayed_dr_projects.append(temp)

projects = compute(delayed_dr_projects)[0]

__wait_for_cv(projects)

### Run delopyment on datarobot

In [16]:
%%time


def __run_deployment(project):
    model = project.get_model_records()[0]
    target = project.target
    prediction_server = dr.PredictionServer.list()[0]

    registered_model_version = dr.RegisteredModelVersion.create_for_leaderboard_item(
        model_id=model.id,
        name=target,
    )

    deployment = dr.Deployment.create_from_registered_model_version(
        registered_model_version.id,
        label=target,
        default_prediction_server_id=prediction_server.id,
    )

    return deployment


delayed_dr_projects = []
for i in range(len(projects)):
    temp = delayed(__run_deployment)(projects[i])
    delayed_dr_projects.append(temp)

deployments = compute(delayed_dr_projects)[0]

### Save deployment ids

In [17]:
deployment_ids = []
for i, d in enumerate(deployments):
    deployment_ids.append(d.id)
    print(d, targets[i], d.id, directions[i])
df_config = pd.DataFrame(
    {
        "Target Name": targets,
        "Deployment ID": deployment_ids,
        "Optimization Direction": directions,
    }
)
df_config.to_csv(input_path + "config.csv", index=False)

### Build streamlit app on datarobot

In [23]:
%%time

# if not running in DataRobot, please input your DataRobot API Token
# DataRobot上で実行していない場合は、DataRobotのAPIトークンを入力してください
# !drapps create -t $DATAROBOT_API_TOKEN -e '[DataRobot] Python 3.12 Applications Base' -p $input_path $app_name
!drapps create -e '[DataRobot] Python 3.12 Applications Base' -p $input_path $app_name

### Clean up
delete resources have up limit

In [None]:
# !drapps terminate $app_name
# for deployment_id in deployment_ids:
#     deployment = dr.Deployment.get(deployment_id)
#     deployment.delete()