# カスタムモデルの作成

In [1]:

import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.

cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='xxxxxxxxxxxxxxxxxx',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/identity/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.direct.jp-tok.cloud-object-storage.appdomain.cloud')

bucket = '241018wsitz19tok-donotdelete-pr-uv1uk0jqn3ouvz'
object_key = 'sentiment.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_1 = pd.read_csv(body)
df_1.head(10)


Unnamed: 0,labels,text
0,ポジティブ,パフォーマンスは素晴らしいです。アプリの起動もスムーズで、マルチタスクを行っても遅延がありません。
1,ネガティブ,ストレージ容量が少ないため、大量のデータを保存する場合には注意が必要です。常に容量不足の警告...
2,ポジティブ,セキュリティ機能は優れています。指紋認証や顔認証など、侵入防止のためのオプションが豊富で、プ...
3,ネガティブ,指紋認証の精度がまだ改善の余地があります。指が湿っていると認識しづらいことがあります。
4,ポジティブ,音質は素晴らしいです。スピーカーからの音楽や通話音はクリアで、外部の騒音にも負けません。
5,ネガティブ,イヤホンジャックが不安定で、接触が悪くなることがあります。音楽を聴いている最中に音が途切れる...
6,ポジティブ,価格は手頃でありながら、性能が高いです。高価な競合製品と比べても、十分な機能を提供しています。
7,ネガティブ,カメラの性能が他の機種に比べると劣っています。特に暗所での撮影時には、ノイズが目立ちます。
8,ポジティブ,バッテリー寿命は驚くほど長く、一日中使用しても安心です。急速充電機能も備えており、急な外出で...
9,ネガティブ,充電器の耐久性が低いです。コネクタがすぐに緩んでしまい、充電が不安定になることがあります。


In [2]:
# 学習データをJSONファイルとして保存
def prepare_data(df):
    # textカラムと分類カテゴリの*labels*が必要
    df_out = df[['text', 'labels']].reset_index(drop=True)
    # labels列は配列でなければなりません (1つしかない場合でも同様)
    df_out['labels'] = df_out['labels'].map(lambda label: [label,])
    return df_out
    
train_df = prepare_data(df_1)
train_json_file = './train_data.json'
train_df.to_json(train_json_file, orient='records')

In [3]:
# Slateのカスタム学習モデルを作成
import watson_nlp

from watson_nlp.blocks.classification.transformer import Transformer
from watson_core.data_model.streams.resolver import DataStreamResolver


# 学習データからデータストリームを作成
data_stream_resolver = DataStreamResolver(target_stream_type=list, expected_keys={'text': str, 'labels': list})
train_stream = data_stream_resolver.as_data_stream(train_json_file)

# 学習済みのSlateモデルをロード
pretrained_model_resource = watson_nlp.load('pretrained-model_slate.153m.distilled_many_transformer_multilingual_uncased')

# Slateのカスタムモデルを作成。学習回数はnum_train_cpochsで指定
classification_model = Transformer.train(train_stream, pretrained_model_resource, num_train_epochs=25)


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at /opt/ibm/nlpmodels/pretrained-model_slate.153m.distilled_many_transformer_multilingual_uncased/artifacts and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
***** Running training *****
  Num examples = 200
  Num Epochs = 25
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 625


Step,Training Loss,Validation Loss


Saving model checkpoint to /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-25
Configuration saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-25/config.json
Model weights saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-25/pytorch_model.bin
tokenizer config file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-25/tokenizer_config.json
Special tokens file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-25/special_tokens_map.json
Saving model checkpoint to /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-50
Configuration saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-50/config.json
Model weights saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-50/pytorch_model.bin
tokenizer config file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-50/tokenizer_config.json
Special tokens file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-50/special_tokens_map.json
Saving model checkpoint to /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoi

Model weights saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-400/pytorch_model.bin
tokenizer config file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-400/tokenizer_config.json
Special tokens file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-400/special_tokens_map.json
Deleting older checkpoint [/tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-275] due to args.save_total_limit
Saving model checkpoint to /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-425
Configuration saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-425/config.json
Model weights saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-425/pytorch_model.bin
tokenizer config file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-425/tokenizer_config.json
Special tokens file saved in /tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-425/special_tokens_map.json
Deleting older checkpoint [/tmp/wsuser/tmpsdsz26hltr-txtclsf-wd/checkpoint-300] due to args.save_total_limit
Saving mod

***** train metrics *****
  epoch                    =       25.0
  total_flos               =     8115GF
  train_loss               =     0.2109
  train_runtime            = 0:14:20.94
  train_samples            =        200
  train_samples_per_second =      5.808
  train_steps_per_second   =      0.726


In [4]:
# カスタムモデルのテスト実行

text = 'ディスプレイは色鮮やかで、映画やゲームを楽しむのに最適です。'
slate_preds = classification_model.run(text)
slate_preds

{
  "classes": [
    {
      "class_name": "ポジティブ",
      "confidence": 0.9916883111000061
    },
    {
      "class_name": "ネガティブ",
      "confidence": 0.008311744779348373
    }
  ],
  "producer_id": {
    "name": "Transformer-based Text Classifier",
    "version": "0.0.1"
  }
}

#　WMLへのデプロイ

In [5]:
api_key = 'PASTE YOUR PLATFORM API KEY HERE'
url = 'PASTE YOUR ENDPOINT HERE'
space_id = 'PASTE YOUR SPACE ID HERE'



In [6]:
from ibm_watsonx_ai import APIClient
from ibm_watsonx_ai import Credentials
credentials = Credentials(
    api_key = api_key,
    url= url
)

client = APIClient(credentials)

In [7]:

client_space = APIClient(credentials, space_id = space_id)

In [8]:
# カスタムモデルはpickle化
import pickle
modelpickle='classification_model.pickle'

with open(modelpickle, 'wb') as f:
    pickle.dump(classification_model, f)


In [9]:
!ls -altr

total 608608
-rw-rw---- 1 wsuser wscommon     49552 Oct 22 01:45 train_data.json
drwxrwx--- 1 wsuser wscommon      4096 Oct 22 01:45 ..
drwxrwx--- 2 wsuser wscommon      4096 Oct 22 02:00 .
-rw-rw---- 1 wsuser wscommon 623146254 Oct 22 02:00 classification_model.pickle


In [10]:
# デプロイメントスペースに保存
input_assest_detail=client_space.data_assets.create(name=modelpickle,file_path=modelpickle)

Creating data asset...
SUCCESS


In [11]:
# Asset_idを取得
print(input_assest_detail['metadata']['asset_id'])

3f49dedd-b679-4d00-ba6e-9f0ca9fa64ca


In [12]:
client_space.data_assets.list().query('NAME== @modelpickle')['ASSET_ID']

0    3f49dedd-b679-4d00-ba6e-9f0ca9fa64ca
Name: ASSET_ID, dtype: object

# デプロイするファンクションの定義

In [13]:
def classifyContext():
    import watson_nlp
    import os
    import pickle

    api_key = 'PASTE YOUR PLATFORM API KEY HERE'
    url = 'PASTE YOUR ENDPOINT HERE'
    space_id = 'PASTE YOUR SPACE ID HERE'

    modelpickle='classification_model.pickle'
    #modelpickle_assest_id='f5cded35-f664-4d18-b802-56c5a7cd13f7'
    
    
    # watsonxクライアントの接続
    from ibm_watsonx_ai import APIClient
    from ibm_watsonx_ai import Credentials
    credentials = Credentials(
        api_key = api_key,
        url= url
    )

    client_space = APIClient(credentials, space_id = space_id)
    # modelのpickleのIDを名前から取得
    modelpickle_assest_id=client_space.data_assets.list().query('NAME== @modelpickle')['ASSET_ID'][0]
    # デプロイメントスペースからダウンロード
    client_space.data_assets.download(modelpickle_assest_id,modelpickle)
    
    #ダウンロード確認
    if(os.path.isfile(modelpickle) == True):
         print('Download OK')
    else:
         print('Download Failed')
    

    # pickleからモデルを復元
    with open(modelpickle, 'rb') as f:
        classified_model = pickle.load(f)    
        
    # スコアリングファンクション
    def score(payload):
        #カスタムモデルでの分類
        c_prediction = classified_model.run(payload['input_data'][0]["values"][0][0])       
        return {'predictions': [c_prediction.to_dict()]}
       
    return score

In [14]:
# ファンクションのテスト
response = classifyContext()({
    "input_data": [{
        "values" :[["ディスプレイは色鮮やかで、映画やゲームを楽しむのに最適です。"]]
    }]
})

Successfully saved data asset content to file: 'classification_model.pickle'
Download OK


In [15]:
import pprint
pprint.pprint(response["predictions"])

[{'classes': [{'class_name': 'ポジティブ', 'confidence': 0.9943910241127014},
              {'class_name': 'ネガティブ', 'confidence': 0.005608959123492241}],
  'producer_id': {'name': 'Transformer-based Text Classifier',
                  'version': '0.0.1'}}]


In [16]:
sw_spec_uid = client.software_specifications.get_id_by_name("runtime-23.1-py3.10")
#sw_spec_uid = client.software_specifications.get_id_by_name("runtime-24.1-py3.11")

In [17]:
# ファンクションの保存
meta_props = {
    client.repository.FunctionMetaNames.NAME: "Slate Test Simple",
    client.repository.FunctionMetaNames.SOFTWARE_SPEC_UID: sw_spec_uid
}

function_details = client_space.repository.store_function(meta_props=meta_props, function=classifyContext)
function_id = client_space.repository.get_function_id(function_details)

In [18]:
# ファンクションをデプロイ
metadata = {
    client_space.deployments.ConfigurationMetaNames.NAME: "Deployment of function Slate Test Simple",
    client_space.deployments.ConfigurationMetaNames.HARDWARE_SPEC: { 'name': 'S'},  
    client_space.deployments.ConfigurationMetaNames.ONLINE: {}
}

function_deployment_details  = client_space.deployments.create(function_id, meta_props=metadata)



######################################################################################

Synchronous deployment creation for id: 'ea7a2e50-4cf2-4f4e-984f-1bf3b0708db8' started

######################################################################################


initializing
Note: online_url and serving_urls are deprecated and will be removed in a future release. Use inference instead.
......
ready


-----------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_id='7a766bc7-4f8d-4e94-a754-9b453c2009d8'
-----------------------------------------------------------------------------------------------




# テストスコアリング実行

In [19]:
# デプロイメントIDの取得
deployment_id=function_deployment_details['metadata']['id']
print(deployment_id)

7a766bc7-4f8d-4e94-a754-9b453c2009d8


In [20]:
# test the function
scoring_payload = {
    "input_data": [{
        "values" :[["ディスプレイは色鮮やかで、映画やゲームを楽しむのに最適です。"]]
    }]
}

In [21]:
predictions = client_space.deployments.score(deployment_id, scoring_payload)
import pprint
pprint.pprint(response["predictions"])

[{'classes': [{'class_name': 'ポジティブ', 'confidence': 0.9943910241127014},
              {'class_name': 'ネガティブ', 'confidence': 0.005608959123492241}],
  'producer_id': {'name': 'Transformer-based Text Classifier',
                  'version': '0.0.1'}}]
