In [1]:
pip install azureml-opendatasets

Note: you may need to restart the kernel to use updated packages.


In [25]:
from azureml.core import Workspace
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\\n')


Workspace name: mlops-pipeline\nAzure region: eastus\nSubscription id: <your subscription id>\nResource group: rg-acr-2025-111


In [26]:
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name="diabetes-experiment")


In [27]:
# 데이터셋 준비 

from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop("Y")

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)

print(X_train)


{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [28]:
# 모델 학습/로깅, 모델파일 업로드
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
import joblib
import math

alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

for alpha in alphas:
    run = experiment.start_logging()
    run.log("alpha_value", alpha)

    model = Ridge(alpha=alpha)
    model.fit(X=X_train, y=y_train)

    y_pred = model.predict(X=X_test)
    
    rmse = math.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
    run.log("rmse", rmse)

    model_name = "model_alpha_" + str(alpha) + ".pkl"
    filename = "outputs/" + model_name

    joblib.dump(value=model, filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()

    print(f"{alpha} exp completed")


0.1 exp completed
0.2 exp completed
0.3 exp completed
0.4 exp completed
0.5 exp completed
0.6 exp completed
0.7 exp completed
0.8 exp completed
0.9 exp completed
1.0 exp completed


In [29]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,mlops-pipeline,Link to Azure Machine Learning studio,Link to Documentation


In [30]:
# Best model 탐색 후 다운로드

minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]
    
    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print("Best run_id: " + minimum_rmse_runid)
print("Best run_id rmse: " + str(minimum_rmse))
from azureml.core import Run
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())
best_run.download_file(name=str(best_run.get_file_names()[0]))


Best run_id: bbee1ab1-25ec-47c1-bf1d-5ea8ab0212ba
Best run_id rmse: 56.60520331339142
['model_alpha_0.1.pkl', 'outputs/model_alpha_0.1.pkl']


In [31]:
# DataStore 에 Input/Output 데이터셋 등록

import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='diabetes-experiment/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])


Uploading an estimated of 2 files
Uploading ./features.csv
Uploaded ./features.csv, 1 files out of an estimated total of 2
Uploading ./labels.csv
Uploaded ./labels.csv, 2 files out of an estimated total of 2
Uploaded 2 files


In [32]:
# Best model 등록

import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = Model.register(workspace=ws,
                       model_name='diabetes-experiment-model',
                       model_path=f"./{str(best_run.get_file_names()[0])}", 
                       model_framework=Model.Framework.SCIKITLEARN,  
                       model_framework_version=sklearn.__version__,  
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)


Registering model diabetes-experiment-model
Name: diabetes-experiment-model
Version: 4


In [42]:
# 모델 배포

from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment

# Environment 설정
env = Environment.get(workspace=ws, name="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")

inference_config = InferenceConfig(
    entry_script="src/score.py",
    environment=env
)

# ACI 설정
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service_name = 'diabetes-service'

from azureml.core.webservice import Webservice
# 기존 서비스 삭제
try:
    service = Webservice(workspace=ws, name=service_name)
    service.delete()
    print("기존 서비스 삭제 완료")
except Exception as e:
    print("기존 서비스가 없거나 이미 삭제됨:", e)


# Deploy Model
service = Model.deploy(
    workspace=ws,
    name=service_name,
    models=[model],
    inference_config=inference_config,
    deployment_config=deployment_config
)

service.wait_for_deployment(show_output=True)

# Azure ML ACR(Azure Container Registry) Access Error => ACR Access key: Enable Admin User 


Running
2025-03-31 12:29:16+00:00 Check and wait for operation (bd7dbafb-3296-46eb-8e02-dbb704bc1e09) to finish.Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2025-03-31 12:29:30+00:00 Registering the environment.
2025-03-31 12:29:30+00:00 Use the existing image.
2025-03-31 12:29:33+00:00 Submitting deployment to compute.
2025-03-31 12:29:41+00:00 Checking the status of deployment diabetes-service..
2025-03-31 12:31:20+00:00 Checking the status of inference endpoint diabetes-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration 
  service = Model.deploy(


In [46]:
# 배포 서비스 테스트 : 노트북
print("\n===== 모델 예측 테스트 =====")

import json

test_samples = X_test[0:2].values.tolist()
print(f"입력 데이터: {test_samples}")

input_payload = json.dumps({
    'data': test_samples
})

# 예측 실행
print("예측 요청 중...")
output = service.run(input_payload)

print("예측 결과:")
print(output)


===== 모델 예측 테스트 =====
입력 데이터: [[54.0, 2.0, 27.3, 100.0, 200.0, 144.0, 33.0, 6.0, 4.7449, 76.0], [64.0, 2.0, 27.3, 109.0, 186.0, 107.6, 38.0, 5.0, 5.3083, 99.0]]
예측 요청 중...
예측 결과:
[162.52293958544317, 208.21988024472398]


In [44]:
# 10. 등록된 모델 목록 확인
print("\n===== 등록된 모델 목록 =====")

registered_models = ml_client.models.list()
for model in registered_models:
    print(f"Name: {model.name}, Version: {model.version}, Created: {model.creation_context.created_at}")


===== 등록된 모델 목록 =====
Name: model, Version: None, Created: 2025-03-31 09:38:24.225932+00:00
Name: scaler, Version: None, Created: 2025-03-31 09:38:27.510159+00:00
Name: diabetes-experiment-model, Version: None, Created: 2025-03-31 10:18:03.105315+00:00
Name: diabetes-model-v2, Version: None, Created: 2025-03-31 11:28:21.526966+00:00


In [None]:
# 11. 리소스 정리 (선택 사항)
# print("\n===== 리소스 정리 =====")
# print(f"엔드포인트 {endpoint_name} 삭제 중...")
# ml_client.online_endpoints.begin_delete(name=endpoint_name)
# print("엔드포인트 삭제 완료")