In [1]:
!pip install scikit-learn~=1.0.0



## Data Loading

In [2]:
from azureml.core import Workspace, Dataset

In [3]:
subscription_id = '93e27594-4e5d-4e68-8b58-7fe0468e93f4'
resource_group = 'azure-hackathon-2022'
workspace_name = 'evangelion01'

workspace = Workspace(subscription_id, resource_group, workspace_name)

In [4]:
sn_kelulusan_all = Dataset.get_by_name(workspace, name='sn_kelulusan_all').to_pandas_dataframe()
sn_nilai_all = Dataset.get_by_name(workspace, name='sn_nilai_all').to_pandas_dataframe()

In [5]:
df_pivot = sn_nilai_all.pivot_table(index="siswa_id", columns=["mapel"], values="nilai").reset_index()
df_combined = df_pivot.merge(sn_kelulusan_all, left_on="siswa_id", right_on="no_urut")
df_cleaned = df_combined.drop(columns=["siswa_id", "no_urut"])
df_cleaned = df_cleaned.fillna(0)

In [6]:
df_cleaned.head()

Unnamed: 0,ANTR,BIO,EKO,FIS,GEO,IND,ING,JAP,JER,KIM,...,PKN,PKR,SBK,SEJ,SI,SIND,SING,SOS,SUN,masuk
0,0.0,84.4,82.2,83.0,0.0,85.4,90.2,0.0,0.0,82.4,...,89.6,81.2,85.0,0.0,81.4,0.0,0.0,0.0,83.8,LOLOS
1,0.0,84.8,86.4,83.8,0.0,89.4,87.0,0.0,0.0,82.4,...,90.8,85.0,85.4,0.0,87.4,0.0,0.0,0.0,86.0,LOLOS
2,0.0,84.4,85.2,82.2,0.0,86.6,82.2,0.0,0.0,79.0,...,89.0,84.6,85.0,0.0,83.0,0.0,0.0,0.0,86.8,TIDAK
3,0.0,85.2,85.2,82.6,0.0,86.8,84.8,0.0,0.0,79.8,...,87.8,82.8,83.0,0.0,83.4,0.0,0.0,0.0,84.6,LOLOS
4,0.0,79.2,80.6,80.0,0.0,83.2,85.8,0.0,0.0,77.6,...,86.2,84.4,82.8,0.0,81.8,0.0,0.0,0.0,82.4,TIDAK


In [7]:
datastore = workspace.get_default_datastore()

In [17]:
df_cleaned.columns

Index(['ANTR', 'BIO', 'EKO', 'FIS', 'GEO', 'IND', 'ING', 'JAP', 'JER', 'KIM',
       'MTK', 'MTK_P', 'PAI', 'PJK', 'PKN', 'PKR', 'SBK', 'SEJ', 'SI', 'SIND',
       'SING', 'SOS', 'SUN', 'masuk'],
      dtype='object')

## Model Building

In [8]:
import mlflow
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [9]:
# create experiment and start logging to a new run in the experiment
experiment_name = "klasifikasi_snmptn_sklearn"

# set up MLflow to track the metrics
mlflow.set_tracking_uri(workspace.get_mlflow_tracking_uri())
mlflow.set_experiment(experiment_name)
mlflow.autolog()

2022/06/17 07:20:41 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2022/06/17 07:20:41 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.
2022/06/17 07:20:41 INFO mlflow.pyspark.ml: No SparkSession detected. Autologging will log pyspark.ml models contained in the default allowlist. To specify a custom allowlist, initialize a SparkSession prior to calling mlflow.pyspark.ml.autolog() and specify the path to your allowlist file via the spark.mlflow.pysparkml.autolog.logModelAllowlistFile conf.
2022/06/17 07:20:41 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.ml.


In [10]:
X = df_cleaned.iloc[:, :-1].values
y = df_cleaned.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [11]:
clf = GradientBoostingClassifier()

# train the model
with mlflow.start_run() as run:
    clf.fit(X_train, y_train)
    
    mlflow.sklearn.eval_and_log_metrics(clf, X_test, y_test, prefix="val_")

    model_uri = "runs:/{}/model".format(run.info.run_id)
    model = mlflow.register_model(model_uri, "klasifikasi_snmptn_sklearn_model")


Registered model 'klasifikasi_snmptn_sklearn_model' already exists. Creating a new version of this model...
2022/06/17 07:20:54 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: klasifikasi_snmptn_sklearn_model, version 4
Created version '4' of model 'klasifikasi_snmptn_sklearn_model'.


In [12]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       LOLOS       0.51      0.53      0.52        36
       TIDAK       0.78      0.78      0.78        80

    accuracy                           0.70       116
   macro avg       0.65      0.65      0.65       116
weighted avg       0.70      0.70      0.70       116



## Deploy

In [13]:
# create environment for the deploy
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.webservice import AciWebservice

In [14]:
# get a curated environment
env = Environment.get(
    workspace=workspace, 
    name="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu",
    version=1
)
env.inferencing_stack_version='latest'

# create deployment config i.e. compute resources
aciconfig = AciWebservice.deploy_configuration(
    cpu_cores=0.5,
    memory_gb=0.5,
    tags={"data": "sn_cleaned", "method": "sklearn"},
    description="Model klasifikasi kelulusan SNMPTN menggunakan Gradient Boosting",
)

In [15]:
import uuid
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.model import Model

In [16]:
# get the registered model
model = Model(workspace, "klasifikasi_snmptn_sklearn_model")

# create an inference config i.e. the scoring script and environment
inference_config = InferenceConfig(entry_script="serve_snmptn.py", environment=env)

# deploy the service
service_name = "klasifikasi-snmptn-sklearn-" + str(uuid.uuid4())[:4]
service = Model.deploy(
    workspace=workspace,
    name=service_name,
    models=[model],
    inference_config=inference_config,
    deployment_config=aciconfig,
)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-06-17 07:20:56+00:00 Creating Container Registry if not exists.
2022-06-17 07:20:56+00:00 Registering the environment.
2022-06-17 07:20:56+00:00 Use the existing image.
2022-06-17 07:20:56+00:00 Generating deployment configuration.
2022-06-17 07:20:58+00:00 Submitting deployment to compute.
2022-06-17 07:21:02+00:00 Checking the status of deployment klasifikasi-snmptn-sklearn-af92..
2022-06-17 07:24:42+00:00 Checking the status of inference endpoint klasifikasi-snmptn-sklearn-af92.
Succeeded
ACI service creation operation finished, operation "Succeeded"
