# Train reward model with human feedback

In [2]:
import psutil

notebook_memory = psutil.virtual_memory()
print(notebook_memory)

if notebook_memory.total < 32 * 1000 * 1000 * 1000:
    print('*******************************************')    
    print('YOU ARE NOT USING THE CORRECT INSTANCE TYPE')
    print('PLEASE CHANGE INSTANCE TYPE TO  m5.2xlarge ')
    print('*******************************************')
else:
    correct_instance_type=True

svmem(total=802916929536, available=796064526336, percent=0.9, used=2468024320, free=761958158336, active=2660704256, inactive=36107153408, buffers=0, cached=38490746880, shared=1355776, slab=1071583232)


In [3]:
%pip install --disable-pip-version-check -q \
    transformers==4.26.1 \
    datasets==2.9.0 \
    accelerate==0.17.0 \
    bitsandbytes==0.37.0 \
    promptsource==0.2.3 \
    trl==0.4.1 \
    evaluate==0.4.0

[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

In [5]:
import io
import json
import uuid
import time
import boto3
import botocore

# Amazon Python SDK clients
sagemaker = boto3.client("sagemaker", region)
a2i = boto3.client("sagemaker-a2i-runtime")
s3 = boto3.client("s3", region)

In [6]:
import os
import glob
import numpy as np
import argparse
import pprint
from collections import defaultdict

import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
import torch.utils.data.distributed
from torch.utils.data import Dataset, DataLoader

from transformers import AutoConfig, AutoModelForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup

  from .autonotebook import tqdm as notebook_tqdm


# Retrieve the `human_loops_started`

In [7]:
%store -r human_loops_started

In [8]:
try:
    human_loops_started
except NameError:
    print("*** PLEASE RUN PREVIOUS NOTEBOOK BEFORE CONTINUING ***")

In [9]:
print(human_loops_started)

['94503827-403b-4554-8d95-48099c9a37a9', '43b29d80-3662-42eb-845a-833e644aea70', 'fccd9e3b-3cf3-4544-8823-a5a321bf5148', '0519da28-3dfe-45bc-b88b-2ddf0100846d']


# Verify the Human Loops are Completed

In [10]:
import time

completed_human_loops = []
for human_loop_name in human_loops_started:
    resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    print(f"HumanLoop Name: {human_loop_name}")
    print(f'HumanLoop Status: {resp["HumanLoopStatus"]}')
    print(f'HumanLoop Output Destination: {resp["HumanLoopOutput"]}')
    print("")
    while resp["HumanLoopStatus"] != "Completed":
        print(f"Waiting for HumanLoop to complete.")
        time.sleep(10)
        resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    if resp["HumanLoopStatus"] == "Completed":
        completed_human_loops.append(resp)
        print(f"Completed!")
        print("")

HumanLoop Name: 94503827-403b-4554-8d95-48099c9a37a9
HumanLoop Status: Completed
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-dsoaws-star-rating-9e22832c-50fa-42c4-aa9f-507a31473ec2/2023/03/26/21/17/40/94503827-403b-4554-8d95-48099c9a37a9/output.json'}

Completed!

HumanLoop Name: 43b29d80-3662-42eb-845a-833e644aea70
HumanLoop Status: Completed
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-dsoaws-star-rating-9e22832c-50fa-42c4-aa9f-507a31473ec2/2023/03/26/21/17/40/43b29d80-3662-42eb-845a-833e644aea70/output.json'}

Completed!

HumanLoop Name: fccd9e3b-3cf3-4544-8823-a5a321bf5148
HumanLoop Status: Completed
HumanLoop Output Destination: {'OutputS3Uri': 's3://sagemaker-us-east-1-079002598131/ground-truth-star-rating-results/fd-dsoaws-star-rating-9e22832c-50fa-42c4-aa9f-507a31473ec2/2023/03/26/21/17/40/fccd9e3b-3cf3-4544-8823-a5a321bf5148/o

# View Human Labels  

Once the work is complete, Amazon GroundTruth stores the results in the specified S3 bucket and sends a Cloudwatch Event.  Here is a sample item labeled with GroundTruth in `jsonlines` format:
```
{
 "inputContent": {"taskObject": {
                         "prompt": "Sometimes it works but usually not",
                         "responses": [2, 3]}
                 },
 "humanAnswers": [{"answerContent": {
                        "ranking_1": "1", # ranking for 1st response (1 is High)
                        "ranking_2": "2"  # ranking for 2nd response (2 is Low)
                 }}]
}
```

# Prepare human-labeled data for RL/PPO training
Retrieve from GrountTruth and convert to a binary reward (-1, 1) for all rankings as follows:

From this:
```
prompt                                response    ranking

Sometimes it works but usually not    2           1   # High
Sometimes it works but usually not    3           2   # Low
```

To this:
```
prompt                                response    ranking

Sometimes it works but usually not    2           1   # Ranked highest
Sometimes it works but usually not    3           0   # Not ranked highest
```

To this (`turn_into_text_classification_format()` below):
```
prompt                                response    highest_ranked_response_index

Sometimes it works but usually not    [2,3]       0   # 0th item in the response list is ranked the highest

```

# _Note:  If nothing is showing up below, you need to return to finish the previous notebook by labeling the data in Ground Truth!!_

In [11]:
import re
from pprint import pprint

human_feedback_items = []

for resp in completed_human_loops:
    human_feedback_s3_uri = resp["HumanLoopOutput"]["OutputS3Uri"]
    split_string = re.split("s3://" + bucket + "/", resp["HumanLoopOutput"]["OutputS3Uri"])
    key = split_string[1]
    
    response = s3.get_object(Bucket=bucket, Key=key)
    content = response["Body"].read().decode("utf-8")
    json_output = json.loads(content)
    print(json_output)

    prompt = json_output["inputContent"]['taskObject']['prompt']
    responses = json_output["inputContent"]['taskObject']['responses']
    response_1_ranking = json_output["humanAnswers"][0]["answerContent"]['response_1_ranking']
    response_2_ranking = json_output["humanAnswers"][0]["answerContent"]['response_2_ranking']
    
    human_feedback_item_1 = (prompt, responses[0], response_1_ranking)
    human_feedback_items.append(human_feedback_item_1)
    human_feedback_item_2 = (prompt, responses[1], response_2_ranking)
    human_feedback_items.append(human_feedback_item_2)

{'flowDefinitionArn': 'arn:aws:sagemaker:us-east-1:079002598131:flow-definition/fd-dsoaws-star-rating-9e22832c-50fa-42c4-aa9f-507a31473ec2', 'humanAnswers': [{'acceptanceTime': '2023-03-26T21:18:14.324Z', 'answerContent': {'response_1_ranking': '2', 'response_2_ranking': '1'}, 'submissionTime': '2023-03-26T21:18:39.397Z', 'timeSpentInSeconds': 25.073, 'workerId': 'e7232bf5ab67e176', 'workerMetadata': {'identityData': {'identityProviderType': 'Cognito', 'issuer': 'https://cognito-idp.us-east-1.amazonaws.com/us-east-1_WEvHYVrSh', 'sub': '06e39925-66bd-45b1-b495-9081ad730e85'}}}], 'humanLoopName': '94503827-403b-4554-8d95-48099c9a37a9', 'inputContent': {'taskObject': {'prompt': 'I enjoy this product', 'responses': [4, 5]}}}
{'flowDefinitionArn': 'arn:aws:sagemaker:us-east-1:079002598131:flow-definition/fd-dsoaws-star-rating-9e22832c-50fa-42c4-aa9f-507a31473ec2', 'humanAnswers': [{'acceptanceTime': '2023-03-26T21:18:46.813Z', 'answerContent': {'response_1_ranking': '1', 'response_2_ranking

In [12]:
df_human_feedback_items = pd.DataFrame(human_feedback_items, columns=['prompt', 'response', 'ranking'])
df_human_feedback_items.head(10)

Unnamed: 0,prompt,response,ranking
0,I enjoy this product,4,2
1,I enjoy this product,5,1
2,I am unhappy with this product,1,1
3,I am unhappy with this product,2,2
4,It is okay,3,1
5,It is okay,4,2
6,Sometimes it works but usually not,2,1
7,Sometimes it works but usually not,3,2


# Convert ranking into 0 or 1 reward

In [13]:
num_rankings = 2
df_human_feedback_items['response'] = df_human_feedback_items['response'].apply(lambda response: str(response))
df_human_feedback_items['ranking'] = df_human_feedback_items['ranking'].apply(lambda ranking: str(abs(int(ranking) - num_rankings)))
df_human_feedback_items.head(10)

Unnamed: 0,prompt,response,ranking
0,I enjoy this product,4,0
1,I enjoy this product,5,1
2,I am unhappy with this product,1,1
3,I am unhappy with this product,2,0
4,It is okay,3,1
5,It is okay,4,0
6,Sometimes it works but usually not,2,1
7,Sometimes it works but usually not,3,0


In [14]:
df_human_feedback_items_grouped_by_prompt = df_human_feedback_items.groupby('prompt', as_index=False).agg({'prompt' : 'first', 'response' : ','.join, 'ranking' : ','.join})
df_human_feedback_items_grouped_by_prompt



Unnamed: 0,prompt,response,ranking
0,I am unhappy with this product,12,10
1,I enjoy this product,45,1
2,It is okay,34,10
3,Sometimes it works but usually not,23,10


In [15]:
df_human_feedback_items_grouped_by_prompt['response'] = df_human_feedback_items_grouped_by_prompt['response'].apply(lambda response: [int(s) for s in response.split(',')])
df_human_feedback_items_grouped_by_prompt['ranking'] = df_human_feedback_items_grouped_by_prompt['ranking'].apply(lambda ranking: [int(s) for s in ranking.split(',')])
df_human_feedback_items_grouped_by_prompt



Unnamed: 0,prompt,response,ranking
0,I am unhappy with this product,"[1, 2]","[1, 0]"
1,I enjoy this product,"[4, 5]","[0, 1]"
2,It is okay,"[3, 4]","[1, 0]"
3,Sometimes it works but usually not,"[2, 3]","[1, 0]"


In [16]:
from datasets import Dataset

# Create Dataset objects (Arrow PyTables) from Pandas dataframes
human_feedback_dataset = Dataset.from_pandas(df_human_feedback_items_grouped_by_prompt)
human_feedback_dataset

Dataset({
    features: ['prompt', 'response', 'ranking'],
    num_rows: 4
})

# Train a reward model with human preference and alignment data
This is typically a language model initialized from the supervised-fine-tuned (SFT) model (trained in a previous notebook), but with an additional binary-classification layer placed on top.  This reward model is used to train the reinforcement-learning model in the next step.  The reinforcement-learning model is what is deployed into production to serve applications.

# TODO:  This should be bloomz (not bloom) at this point in the flow

In [17]:
%store -r model_checkpoint

In [18]:
model_checkpoint = 'bigscience/bloomz-560m'

In [19]:
try:
    model_checkpoint
except NameError:
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("[ERROR] Please run the notebooks in the previous section before you continue.")
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

In [20]:
print(model_checkpoint)

bigscience/bloomz-560m


In [21]:
%store -r supervised_fine_tuned_model_path

In [22]:
try:
    supervised_fine_tuned_model_path
except NameError:
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("[ERROR] Please run the notebooks in the previous section before you continue.")
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

In [23]:
print(supervised_fine_tuned_model_path)

./tmp_models/bigscience/bloom-560m/


In [25]:
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union

import evaluate
import numpy as np
import torch.nn as nn
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    HfArgumentParser,
    PreTrainedTokenizerBase,
    Trainer,
    TrainingArguments,
)
from transformers.utils import PaddingStrategy

In [26]:
# Simulated (cooking show) output from SFT (Step 1) for Amazon Customer Review generative-classification task
#model_name = "bigscience/bloomz-560m" 

# Load the human comparisons dataset for tuning the reward model.
# Simulated output from HF Step 2 to be used to train reward for "helpfulness"
#ds = load_dataset("openai/summarize_from_feedback", name="comparisons") 

# Load the value-head model and tokenizer.
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [27]:
# Turn the dataset into pairs of prompt + responses, where text_j is the preferred prompt + response and text_k is the other.
def turn_into_text_classification_format(examples):
    new_examples = {"text_j": [], "text_k": []}
    for prompt, response, ranking in zip(examples["prompt"], examples["response"], examples["ranking"]):
        # TODO:  Add a check to make sure there is only a single 0 and a single 1
        if len(response) != 2 or len(ranking) != 2 or ranking[0] not in (0, 1) or ranking[1] not in (0, 1):
            raise ValueError(
                f"There should be two responses with a ranking that is either 0 or 1. Received {len(response)} responses and {len(ranking)} rankings."
            )
            
        highest_ranked_response_index = ranking.index(1)

        new_examples["text_j"].append(
            str(response[highest_ranked_response_index]) + " " + tokenizer.bos_token + " " + prompt
        )
        new_examples["text_k"].append(
            str(response[0 if highest_ranked_response_index == 1 else 1]) + " " + tokenizer.bos_token + " " + prompt
        )

    return new_examples

# Tokenize the dataset.
def preprocess_function(examples):
    tokenized_j = tokenizer(examples["text_j"], truncation=True)
    tokenized_k = tokenizer(examples["text_k"], truncation=True)
    return {
        "input_ids_j": tokenized_j["input_ids"],
        "attention_mask_j": tokenized_j["attention_mask"],
        "input_ids_k": tokenized_k["input_ids"],
        "attention_mask_k": tokenized_k["attention_mask"],
    }


In [28]:
num_proc = 8  # Can adjust to be higher if you have more processors. Should work even if you don't have 8 CPUs, though.
original_columns = human_feedback_dataset.column_names
print(original_columns)

human_feedback_binary_classification_dataset = human_feedback_dataset.map(turn_into_text_classification_format, batched=True, num_proc=num_proc, remove_columns=original_columns)

human_feedback_tokenized_dataset = human_feedback_binary_classification_dataset.map(preprocess_function, 
                                                                                    batched=True, 
                                                                                    num_proc=num_proc, 
                                                                                    remove_columns=["text_j", "text_k"])

print(human_feedback_tokenized_dataset)


num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4.


['prompt', 'response', 'ranking']


#0:   0%|          | 0/1 [00:00<?, ?ba/s]
#1:   0%|          | 0/1 [00:00<?, ?ba/s][A

#2:   0%|          | 0/1 [00:00<?, ?ba/s][A[A


#0: 100%|██████████| 1/1 [00:00<00:00, 93.81ba/s]A
#1: 100%|██████████| 1/1 [00:00<00:00, 114.21ba/s]
#2: 100%|██████████| 1/1 [00:00<00:00, 127.15ba/s]
#3: 100%|██████████| 1/1 [00:00<00:00, 159.15ba/s]
num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4.
#0:   0%|          | 0/1 [00:00<?, ?ba/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
#2:   0%|          | 0/1 [00:00<?, ?ba/s][A[A
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
#0: 100%|██████████| 1/1 [00:00<00:00, 148.19ba/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.




#2: 100%|██████████| 

Dataset({
    features: ['input_ids_j', 'attention_mask_j', 'input_ids_k', 'attention_mask_k'],
    num_rows: 4
})


In [29]:
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=1)

# # Need to do this for gpt2, because it doesn't have an official pad token.
# tokenizer.pad_token = tokenizer.eos_token
# model.config.pad_token_id = tokenizer.eos_token_id

Some weights of BloomForSequenceClassification were not initialized from the model checkpoint at bigscience/bloomz-560m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [30]:
# Define the metric that we'll use for validation.
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, _ = eval_pred
    # Here, predictions is rewards_j and rewards_k.
    # We want to see how much of the time rewards_j > rewards_k.
    predictions = np.argmax(predictions, axis=0)
    labels = np.zeros(predictions.shape)
    return accuracy.compute(predictions=predictions, references=labels)

In [31]:
# We need to define a special data collator that batches the data in our j vs k format.
@dataclass
class RewardDataCollatorWithPadding:
    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
    return_tensors: str = "pt"

    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
        features_j = []
        features_k = []
        for feature in features:
            features_j.append({"input_ids": feature["input_ids_j"], "attention_mask": feature["attention_mask_j"]})
            features_k.append({"input_ids": feature["input_ids_k"], "attention_mask": feature["attention_mask_k"]})
        batch_j = self.tokenizer.pad(
            features_j,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors=self.return_tensors,
        )
        batch_k = self.tokenizer.pad(
            features_k,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors=self.return_tensors,
        )
        batch = {
            "input_ids_j": batch_j["input_ids"],
            "attention_mask_j": batch_j["attention_mask"],
            "input_ids_k": batch_k["input_ids"],
            "attention_mask_k": batch_k["attention_mask"],
            "return_loss": True,
        }
        return batch

In [32]:
class RewardTrainer(Trainer):
    # Define how to compute the reward loss.
    def compute_loss(self, model, inputs, return_outputs=False):
        rewards_j = model(input_ids=inputs["input_ids_j"], attention_mask=inputs["attention_mask_j"])[0]
        rewards_k = model(input_ids=inputs["input_ids_k"], attention_mask=inputs["attention_mask_k"])[0]
        loss = -nn.functional.logsigmoid(rewards_j - rewards_k).mean()
        if return_outputs:
            return loss, {"rewards_j": rewards_j, "rewards_k": rewards_k}
        return loss

# Define and parse arguments.
local_rank = 0
resume_from_checkpoint = False
deepspeed = None
per_device_train_batch_size = 16
per_device_eval_batch_size = 16
gradient_accumulation_steps = 4
learning_rate = 2e-5
weight_decay = 0.001
bf16 = False
num_train_epochs = 1

# Define the training args. Needs to be done before the model is loaded if you are using deepspeed.
training_args = TrainingArguments(
    output_dir=f"{model_checkpoint.replace('/', '_')}_reward_model",
    learning_rate=learning_rate,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
#    evaluation_strategy="epoch",
    save_strategy="epoch",
    gradient_accumulation_steps=gradient_accumulation_steps,
#    deepspeed=deepspeed,
#    local_rank=local_rank,
    remove_unused_columns=False,
    label_names=[],
)
    
# Train the model, woohoo.
trainer = RewardTrainer(
    model=model,
    args=training_args,
    train_dataset=human_feedback_tokenized_dataset, #["train"],
#    eval_dataset=tokenized_ds["validation"],
    compute_metrics=compute_metrics,
    data_collator=RewardDataCollatorWithPadding(tokenizer=tokenizer),
)

trainer.train(resume_from_checkpoint)

***** Running training *****
  Num examples = 4
  Num Epochs = 1
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 512
  Gradient Accumulation steps = 4
  Total optimization steps = 1
  Number of trainable parameters = 559215616


[2023-03-27 00:09:33.749: W smdistributed/modelparallel/torch/nn/predefined_hooks.py:78] Found unsupported HuggingFace version 4.26.1 for automated tensor parallelism. HuggingFace modules will not be automatically distributed. You can use smp.tp_register_with_module API to register desired modules for tensor parallelism, or directly instantiate an smp.nn.DistributedModule. Supported HuggingFace transformers versions for automated tensor parallelism: ['4.17.0', '4.20.1', '4.21.0']
[2023-03-27 00:09:33.786 pytorch-1-12-gpu--ml-p3dn-24xlarge-307ebad80d11874f5dcc2ce687db:7284 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2023-03-27 00:09:33.913 pytorch-1-12-gpu--ml-p3dn-24xlarge-307ebad80d11874f5dcc2ce687db:7284 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.


You're using a BloomTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Could not estimate the number of tokens of the input, floating-point operations will not be computed


Step,Training Loss


Saving model checkpoint to bigscience_bloomz-560m_summarization_reward_model/checkpoint-1
Configuration saved in bigscience_bloomz-560m_summarization_reward_model/checkpoint-1/config.json
Model weights saved in bigscience_bloomz-560m_summarization_reward_model/checkpoint-1/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Saving model checkpoint to ./tmp_models/reward_model/
Configuration saved in ./tmp_models/reward_model/config.json
Model weights saved in ./tmp_models/reward_model/pytorch_model.bin


In [None]:
reward_model_path = './tmp_models/reward_model/'

trainer.save_model(reward_model_path)

In [40]:
%store reward_model_path

Stored 'reward_model_path' (str)


In [39]:
reward_model = AutoModelForSequenceClassification.from_pretrained(reward_model_path, num_labels=1)

loading configuration file ./tmp_models/reward_model/config.json
Model config BloomConfig {
  "_name_or_path": "./tmp_models/reward_model/",
  "apply_residual_connection_post_layernorm": false,
  "architectures": [
    "BloomForSequenceClassification"
  ],
  "attention_dropout": 0.0,
  "attention_softmax_in_fp32": true,
  "bias_dropout_fusion": true,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_dropout": 0.0,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_epsilon": 1e-05,
  "masked_softmax_fusion": true,
  "model_type": "bloom",
  "n_head": 16,
  "n_inner": null,
  "n_layer": 24,
  "offset_alibi": 100,
  "pad_token_id": 2,
  "pretraining_tp": 1,
  "seq_length": 2048,
  "skip_bias_add": true,
  "skip_bias_add_qkv": false,
  "slow_but_exact": false,
  "torch_dtype": "float32",
  "transformers_version": "4.26.1",
  "unk_token_id": 0,
  "use_cache": true,
  "vocab_size": 250880
}

loadin

In [33]:
# def create_list_input_files(path):
#     input_files = glob.glob('{}/*.parquet'.format(path))
#     print(input_files)
#     return input_files

# def save_transformer_model(model, model_dir):
#     path = os.path.join(model_dir, 'transformer')
#     os.makedirs(path, exist_ok=True)                              
#     print('Saving Transformer model to {}'.format(path))
#     model.save_pretrained(path)

# def save_pytorch_model(model, model_checkpoint, model_dir):
#     os.makedirs(model_dir, exist_ok=True) 
#     print('Saving PyTorch model to {}'.format(model_dir))
#     save_path = os.path.join(model_dir, model_checkpoint.replace('/', '-'))
#     torch.save(model.state_dict(), save_path)

# PyTorch Dataset and DataLoader 

In [34]:
# # PyTorch dataset retrieves the dataset’s features and labels one sample at a time
# # Create a custom Dataset class for the reviews
# class ReviewDataset(Dataset):
    
#     def __init__(self, input_ids_list, label_id_list):
#         self.input_ids_list = input_ids_list
#         self.label_id_list = label_id_list

#     def __len__(self):
#         return len(self.input_ids_list)

#     def __getitem__(self, item):
#         # convert list of token_ids into an array of PyTorch LongTensors
#         input_ids = json.loads(self.input_ids_list[item]) 
#         label_id = self.label_id_list[item]

#         input_ids_tensor = torch.LongTensor(input_ids)
#         label_id_tensor = torch.tensor(label_id, dtype=torch.long)

#         return input_ids_tensor, label_id_tensor

    
# # PyTorch DataLoader helps to to organise the input training data in “minibatches” and reshuffle the data at every epoch
# # It takes Dataset as an input
# def create_data_loader(path, batch_size): 
#     print("Get data loader")

#     df = pd.DataFrame(columns=['input_ids', 'label_id'])
    
#     input_files = create_list_input_files(path)

#     for file in input_files:
#         # df_temp = pd.read_csv(file, 
#         #                       sep='\t', 
#         #                       usecols=['input_ids', 'label_id'])
#         df_temp = pd.read_parquet(file)
#         df = df.append(df_temp)
#         print('adding df_temp: {}'.format(df_temp))
        
#     ds = ReviewDataset(
#         input_ids_list=df.input_ids.to_numpy(),
#         label_id_list=df.label_id.to_numpy(),
#     )
    
#     return DataLoader(
#         ds,
#         batch_size=batch_size,
#         shuffle=True,
#         drop_last=True,
#     ), df



# Configure the model

In [35]:
# # TODO:  Change this to binary classification
# #        where 1 is assigned to the human-selected (presumably-correct) label
# #        and 0 is assigned to all of other labels

# def get_model_config():
#     classes = [1, 2, 3, 4, 5]

#     config = AutoConfig.from_pretrained(
#         supervised_fine_tuned_model_path,        
#         num_labels=len(classes),
#         id2label={
#             0: 1, 
#             1: 2, 
#             2: 3, 
#             3: 4, 
#             4: 5            
#         },
#         label2id={
#             1: 0,
#             2: 1,
#             3: 2,
#             4: 3,
#             5: 4
#         }
#     )
    
#     config.output_attentions=True

#     return config

# Train the reward model

In [36]:
# def train_model(model,
#                 train_data_loader,
#                 df_train,
#                 val_data_loader, 
#                 df_val,
#                 args):
    
#     loss_function = nn.CrossEntropyLoss()    
#     optimizer = optim.Adam(params=model.parameters(), lr=args.learning_rate)
    
#     if args.freeze_base_layers:
#         print('Freezing base layers...')
#         for name, param in model.named_parameters():
#             if 'classifier' not in name:  # classifier layer
#                 param.requires_grad = False
#         print('Set classifier layers to `param.requires_grad=False`.')        
    
#     train_correct = 0
#     train_total = 0

#     for epoch in range(args.epochs):
#         print('EPOCH -- {}'.format(epoch))

#         for i, (sent, label) in enumerate(train_data_loader):
#             print('i: ' + i)
#             print('sent: ' + sent)
#             print('label: ' + label)            
#             if i < args.train_steps_per_epoch:
#                 model.train()
#                 optimizer.zero_grad()
#                 sent = sent.squeeze(0)
#                 if torch.cuda.is_available():
#                     sent = sent.cuda()
#                     label = label.cuda()
#                 output = model(sent)[0]
#                 _, predicted = torch.max(output, 1)

#                 loss = loss_function(output, label)
#                 loss.backward()
#                 optimizer.step()
            
#                 if args.run_validation and i % args.validation_steps_per_epoch == 0:
#                     print('RUNNING VALIDATION:')
#                     correct = 0
#                     total = 0
#                     model.eval()

#                     for sent, label in val_data_loader:
#                         sent = sent.squeeze(0)
#                         if torch.cuda.is_available():
#                             sent = sent.cuda()
#                             label = label.cuda()
#                         output = model(sent)[0]
#                         _, predicted = torch.max(output.data, 1)

#                         total += label.size(0)
#                         correct += (predicted.cpu() ==label.cpu()).sum()

#                     accuracy = 100.00 * correct.numpy() / total
#                     print('[epoch/step: {0}/{1}] val_loss: {2:.2f} - val_acc: {3:.2f}%'.format(epoch, i, loss.item(), accuracy))
#             else:
#                 break           

#     print('TRAINING COMPLETED.')
#     return model

In [37]:
# from pprint import pprint
# import random

# #if __name__ == '__main__':
    
# # Parse args


# os.environ['SM_HOSTS'] = '{"hosts": ["algo-1"]}'
# os.environ['SM_CURRENT_HOST'] = 'algo-1'
# os.environ['SM_NUM_GPUS'] = '0'
# os.environ['SM_MODEL_DIR'] = './model/reward_model/'
# os.environ['SM_CHANNEL_TRAIN'] = './data/train'
# os.environ['SM_CHANNEL_VALIDATION'] = './data/validation'
# os.environ['SM_OUTPUT_DIR'] = './model_output/'


# parser = argparse.ArgumentParser()

# # CLI args

# parser.add_argument('--train_batch_size', 
#                     type=int, 
#                     default=64)

# parser.add_argument('--train_steps_per_epoch',
#                     type=int,
#                     default=64)

# parser.add_argument('--validation_batch_size', 
#                     type=int, 
#                     default=64)

# parser.add_argument('--validation_steps_per_epoch',
#                     type=int,
#                     default=64)

# parser.add_argument('--epochs', 
#                     type=int, 
#                     default=10)

# parser.add_argument('--freeze_base_layers', 
#                     type=eval, 
#                     default=False)

# parser.add_argument('--learning_rate', 
#                     type=float, 
#                     default=0.01)

# parser.add_argument('--momentum', 
#                     type=float, 
#                     default=0.5)

# parser.add_argument('--seed', 
#                     type=int, 
#                     default=42)

# parser.add_argument('--log_interval', 
#                     type=int, 
#                     default=100)

# parser.add_argument('--backend', 
#                     type=str, 
#                     default=None)

# parser.add_argument('--run_validation', 
#                     type=eval,
#                     default=False)

# parser.add_argument('--model-checkpoint', 
#                     type=str,
#                     default=model_checkpoint)


# # Container environment  

# parser.add_argument('--hosts', 
#                     type=list, 
#                     default=json.loads(os.environ['SM_HOSTS']))

# parser.add_argument('--current_host', 
#                     type=str, 
#                     default=os.environ['SM_CURRENT_HOST'])

# parser.add_argument('--model_dir', 
#                     type=str, 
#                     default=os.environ['SM_MODEL_DIR'])

# parser.add_argument('--train_data', 
#                     type=str, 
#                     default=os.environ['SM_CHANNEL_TRAIN'])

# parser.add_argument('--validation_data', 
#                     type=str, 
#                     default=os.environ['SM_CHANNEL_VALIDATION'])

# parser.add_argument('--output_dir', 
#                     type=str, 
#                     default=os.environ['SM_OUTPUT_DIR'])

# parser.add_argument('--num_gpus', 
#                     type=int, 
#                     default=os.environ['SM_NUM_GPUS'])

# # Debugger args

# parser.add_argument("--save-frequency", 
#                     type=int, 
#                     default=10, 
#                     help="frequency with which to save steps")

# parser.add_argument("--smdebug_path",
#                     type=str,
#                     help="output directory to save data in",
#                     default="/opt/ml/output/tensors",)

# parser.add_argument("--hook-type",
#                     type=str,
#                     choices=["saveall", "module-input-output", "weights-bias-gradients"],
#                     default="saveall",)

# args, _ = parser.parse_known_args()


# print('Loaded arguments:')
# print(args)

# # Get environment variables

# env_var = os.environ 
# print('Environment variables:')
# pprint(dict(env_var), width = 1) 

# # Check if distributed training

# is_distributed = len(args.hosts) > 1 and args.backend is not None

# print("Distributed training - {}".format(is_distributed))
# use_cuda = args.num_gpus > 0
# print("Number of gpus available - {}".format(args.num_gpus))
# kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# device = torch.device('cuda' if use_cuda else 'cpu')

# # Initialize the distributed environment.

# if is_distributed:
#     world_size = len(args.hosts)
#     os.environ['WORLD_SIZE'] = str(world_size)
#     host_rank = args.hosts.index(args.current_host)
#     os.environ['RANK'] = str(host_rank)
#     dist.init_process_group(backend=args.backend, rank=host_rank, world_size=world_size)
#     print('Initialized the distributed environment: \'{}\' backend on {} nodes. '.format(
#         args.backend, dist.get_world_size()) + 'Current host rank is {}. Number of gpus: {}'.format(
#         dist.get_rank(), args.num_gpus))

# # Set the seed for generating random numbers

# torch.manual_seed(args.seed)
# if use_cuda:
#     torch.cuda.manual_seed(args.seed) 

# # Instantiate model

# config = None
# model = None

# successful_download = False
# retries = 0

# while (retries < 5 and not successful_download):
#     try:
#         # Setup model
#         config = get_model_config()
#         model = AutoModelForSequenceClassification.from_pretrained(
#             supervised_fine_tuned_model_path,
#             config=config
#         )

#         model.to(device)
#         successful_download = True
#         print('Sucessfully downloaded after {} retries.'.format(retries))

#     except:
#         retries = retries + 1
#         random_sleep = random.randint(1, 30)
#         print('Retry #{}.  Sleeping for {} seconds'.format(retries, random_sleep))
#         time.sleep(random_sleep)

# if not model:
#      print('Not properly initialized...')

# # Create data loaders

# train_data_loader, df_train = create_data_loader(args.train_data, args.train_batch_size)
# val_data_loader, df_val = create_data_loader(args.validation_data, args.validation_batch_size)

# print("Processes {}/{} ({:.0f}%) of train data".format(
#     len(train_data_loader.sampler), len(train_data_loader.dataset),
#     100. * len(train_data_loader.sampler) / len(train_data_loader.dataset)
# ))

# print("Processes {}/{} ({:.0f}%) of validation data".format(
#     len(val_data_loader.sampler), len(val_data_loader.dataset),
#     100. * len(val_data_loader.sampler) / len(val_data_loader.dataset)
# )) 

# print('model_dir: {}'.format(args.model_dir))    
# #print('model summary: {}'.format(model))

# callbacks = []
# initial_epoch_number = 0

# # Start training

# model = train_model(
#     model,
#     train_data_loader,
#     df_train,
#     val_data_loader, 
#     df_val,
#     args
# )

In [38]:
save_transformer_model(model, args.model_dir)
save_pytorch_model(model, args.model_checkpoint, args.model_dir)

NameError: name 'save_transformer_model' is not defined

In [None]:
from transformers import TextClassificationPipeline
from transformers import pipeline

reward_model_path = os.path.join(args.model_dir, 'transformer'),

device = torch.device('cuda' if use_cuda else 'cpu')

inference_pipeline = pipeline("text-classification", 
                              reward_model_path,
                             )

In [None]:
# %%html

# <p><b>Shutting down your kernel for this notebook to release resources.</b></p>
# <button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
# <script>
# try {
#     els = document.getElementsByClassName("sm-command-button");
#     els[0].click();
# }
# catch(err) {
#     // NoOp
# }    
# </script>