In [1]:
import os
import re
import sys
import json
import yaml
import time
import glob
import logging
import pandas as pd
from typing import Dict
from pathlib import Path
from tokenizer_utils import count_tokens
from bedrock_utils import get_bedrock_client
from utils import  (
    get_rouge_l_score,
    get_cosine_similarity,
    parse_model_response,
    is_amazon_model)


In [2]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)


In [3]:
# global constants
CONFIG_FILE_PATH = "config.yaml"


In [4]:
# read the config yaml file
fpath = CONFIG_FILE_PATH
with open(fpath, 'r') as yaml_in:
    config = yaml.safe_load(yaml_in)
logger.info(f"config read from {fpath} -> {json.dumps(config, indent=2)}")


[2024-01-12 13:56:22,179] p31920 {2625127137.py:5} INFO - config read from config.yaml -> {
  "app_name": "contact-center-transcript-summarization",
  "aws": {
    "region": "us-east-1",
    "sagemaker_execution_role": "Admin"
  },
  "dir": {
    "data": "data",
    "raw": "data/raw",
    "golden": "data/raw/golden",
    "prompts": "data/prompts",
    "models": "data/models",
    "metrics": "data/metrics",
    "completions": "data/completions",
    "async_completions": "data/async_completions"
  },
  "data": {
    "raw_data_file": "data.csv",
    "golden_transcript": "data/raw/golden/transcript.txt",
    "golden_transcript_summary": "data/raw/golden/summary.txt"
  },
  "prompt": {
    "very_large_prompt": {
      "sleep_time": 180,
      "threshold": 70000
    },
    "normal_prompt": {
      "sleep_time": 60
    }
  },
  "max_retries": 3,
  "desired_word_count_for_summary": 80,
  "experiments": [
    {
      "name": "single-line-reason",
      "prompt_template": null,
      "reps": 3,


In [5]:
bedrock_client = get_bedrock_client()


[2024-01-12 13:56:23,043] p31920 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Create new client
  Using region: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)


In [6]:

boto3_bedrock = get_bedrock_client(runtime=False)
fm_list_response = boto3_bedrock.list_foundation_models()
fm_list = fm_list_response['modelSummaries']
df_fm = pd.DataFrame(fm_list)
display(df_fm)


[2024-01-12 13:56:23,670] p31920 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Create new client
  Using region: None
boto3 Bedrock client successfully created!
bedrock(https://bedrock.us-east-1.amazonaws.com)


Unnamed: 0,modelArn,modelId,modelName,providerName,inputModalities,outputModalities,responseStreamingSupported,customizationsSupported,inferenceTypesSupported,modelLifecycle
0,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-tg1-large,Titan Text Large,Amazon,[TEXT],[TEXT],True,[],[ON_DEMAND],{'status': 'ACTIVE'}
1,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-e1t-medium,Titan Text Embeddings,Amazon,[TEXT],[EMBEDDING],,[],[ON_DEMAND],{'status': 'LEGACY'}
2,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-image-generator-v1:0,Titan Image Generator G1,Amazon,"[TEXT, IMAGE]",[IMAGE],,[FINE_TUNING],"[ON_DEMAND, PROVISIONED]",{'status': 'ACTIVE'}
3,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-image-generator-v1,Titan Image Generator G1,Amazon,"[TEXT, IMAGE]",[IMAGE],,[],[ON_DEMAND],{'status': 'ACTIVE'}
4,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-embed-g1-text-02,Titan Text Embeddings v2,Amazon,[TEXT],[EMBEDDING],,[],[ON_DEMAND],{'status': 'ACTIVE'}
5,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-lite-v1:0:4k,Titan Text G1 - Lite,Amazon,[TEXT],[TEXT],True,"[FINE_TUNING, CONTINUED_PRE_TRAINING]",[PROVISIONED],{'status': 'ACTIVE'}
6,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-lite-v1,Titan Text G1 - Lite,Amazon,[TEXT],[TEXT],True,[],[ON_DEMAND],{'status': 'ACTIVE'}
7,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-express-v1:0:8k,Titan Text G1 - Express,Amazon,[TEXT],[TEXT],True,"[FINE_TUNING, CONTINUED_PRE_TRAINING]",[PROVISIONED],{'status': 'ACTIVE'}
8,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-express-v1,Titan Text G1 - Express,Amazon,[TEXT],[TEXT],True,[],[ON_DEMAND],{'status': 'ACTIVE'}
9,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-embed-text-v1:2:8k,Titan Embeddings G1 - Text,Amazon,[TEXT],[EMBEDDING],False,[],[PROVISIONED],{'status': 'ACTIVE'}


In [7]:
transcript_files = glob.glob(os.path.join(config['dir']['raw'], "*", "*transcript.txt"))
logger.info(f"found {len(transcript_files)} transcript_files ->\n{transcript_files}")


[2024-01-12 13:56:24,387] p31920 {3189124487.py:2} INFO - found 5 transcript_files ->
['data/raw/0/call_center_transcript_1_transcript.txt', 'data/raw/1/call_center_transcript_0_transcript.txt', 'data/raw/4/call_center_transcript_4_transcript.txt', 'data/raw/3/call_center_transcript_3_transcript.txt', 'data/raw/2/call_center_transcript_2_transcript.txt']


In [8]:
# function to encapsulate call to any Bedrock model for text generation
def get_text_response_from_bedrock(prompt: str, inference_parameters: Dict, model_id: str) -> Dict:

    logger.info(f"model_id={model_id}, prompt length is {len(prompt)} characters, {len(prompt.split())} words")      
    # initialize the response dict
    ret = dict(exception = None,
               prompt = prompt,
               completion = None,
               model_id = model_id,
               time_taken_in_seconds = None,
               completion_token_count = None,
               prompt_token_count=None)
    
    if is_amazon_model(model_id) is False:
        body = dict(prompt=prompt) | inference_parameters
    else:
        body = dict(inputText=prompt, textGenerationConfig=inference_parameters)

    body_bytes = json.dumps(body).encode('utf-8')

    # invoke the bedrock API for inferences
    st = time.time()
    try:
        response = bedrock_client.invoke_model(
                        modelId=model_id,
                        contentType="application/json",
                        accept="*/*",
                        body=body_bytes,
                    )
    except Exception as e:
        logger.error(f"exception when calling invoke_model, model_id={model_id}, exception={e}")
        ret['exception'] = e
        return ret
    
    ret['time_taken_in_seconds'] = time.time() - st
    # response is a Boto3 object, read it to convert into bytes
    resp_body = json.loads(response["body"].read().decode('utf-8'))
    
    parsed_response = parse_model_response(model_id, resp_body)
    ret.update(parsed_response)
    logger.info(json.dumps(ret, indent=2))
    return ret


In [9]:
from json import JSONEncoder
import re
from pathlib import Path
import os
import time
import json

class CustomJSONEncoder(JSONEncoder):
    def default(self, o):
        if isinstance(o, Exception):
            return {'error_type': o.__class__.__name__, 'error_message': str(o)}
        return JSONEncoder.default(self, o)

for idx, tf in enumerate(transcript_files):
    transcript = Path(tf).read_text()
    fname = os.path.basename(tf)
    file_id = "_".join(fname.split('_')[:-1])
    golden_summary_fpath = os.path.join(config['dir']['raw'], '0', "example_prompt_transcript_golden_summary.txt")
    logger.info(f"idx={idx}, tf={tf}, file_id={file_id}, golden_summary_fpath={golden_summary_fpath}")
    # for each experiment
    for experiment in config['experiments']:
        exp_name = experiment['name']        
        model_list = experiment['model_list']
        reps = experiment['reps']        
        for rep in range(reps):
            for model_info in model_list:
                model_name = model_info['model']
                prompt_template_fname = model_info['prompt_template']
                logger.info(f"exp_name={exp_name}, prompt_template={prompt_template_fname}, reps={reps}")
                model = config['bedrock_models'].get(model_name)
                if model is None:
                    logger.error(f"model={model_name} not found in bedrock_models")
                    continue
                model['model_id'] = model_name
                logger.info(f"going to summarize idx={idx}, file={tf} via {model['model_id']}")
                fpath = os.path.join(config['dir']['prompts'], prompt_template_fname)
                prompt_template = Path(fpath).read_text()
                logger.info(f"read prompt template from {fpath}")

                prompt = prompt_template.format(transcript)
                # print(prompt)
                prompt_token_count = count_tokens(prompt)
                logger.info(f"prompt for {tf} has {prompt_token_count} tokens")

                if (prompt_token_count + 125) > model['context_length']:
                    logger.info(f"cannot summarize {tf} with {model['model_id']} as prompt length {prompt_token_count} > {model['context_length']}")
                    continue
                very_large_prompt = prompt_token_count > config['prompt']['very_large_prompt']['threshold']
                inference_parameters = config['inference_params'][model['inference_param_set']]
                
                resp = get_text_response_from_bedrock(prompt, inference_parameters, model['model_id'])
                if resp['exception'] is not None:
                    logger.error(f"exception occurred for {tf}, model_id={model['model_id']}")
                    e = resp['exception']
                    if e.response['Error']['Code'] == 'ThrottlingException':
                        retries = 0
                        while True:
                            retries += 1
                            if very_large_prompt is True:
                                time.sleep(retries * config['prompt']['very_large_prompt']['sleep_time'])
                            else:
                                time.sleep(retries * config['prompt']['normal_prompt']['sleep_time'])
                            logger.error(f"going to retry for {tf}, model_id={model['model_id']}, retries={retries}")
                            resp = get_text_response_from_bedrock(prompt, inference_parameters, model['model_id'])
                            if resp['exception'] is not None:
                                logger.error(f"exception occurred for {tf}, model_id={model['model_id']} on retry as well")
                                if retries >= config['max_retries']:
                                    logger.error(f"retries={retries} >= configured max retries={config['max_retries']}, giving up")
                                    break
                            else:
                                logger.info(f"retry succeeded, retries={retries}")
                                break

                if resp['exception'] is None:
                    if resp['prompt_token_count'] is None:
                        logger.info(f"prompt_token_count is None for {model_name}, setting it to {prompt_token_count} ")
                        resp['prompt_token_count'] = prompt_token_count
                    resp['completion'] = re.sub(r"<reason>|</reason>", "", resp['completion']).strip()
                    resp['completion'] = re.sub(r"<reasons>|</reasons>", "", resp['completion']).strip()
                    dir_path = os.path.join(config['dir']['completions'], exp_name, file_id)
                    os.makedirs(dir_path, exist_ok=True)

                    fpath = os.path.join(dir_path, f"{file_id}_{model['model_id']}_rep{rep+1}.txt")
                    Path(fpath).write_text(resp['completion'])

                    if Path(golden_summary_fpath).is_file() is True:
                        golden_summary = Path(golden_summary_fpath).read_text()
                        # rouge score
                        resp['rouge_l_f1_score'] = get_rouge_l_score(golden_summary, resp['completion'])
                        # cosine similarity
                        resp['cosine_similarity'] = get_cosine_similarity(golden_summary, resp['completion'])
                    resp['cost'] = model['prompt_token_pricing_per_million'] * (resp['prompt_token_count'] / 1000000) + \
                        model['completion_token_pricing_per_million'] * (resp['completion_token_count'] / 1000000)
                    
                    resp['completion_word_count'] = len(resp['completion'].split())
                    resp['experiment'] = exp_name
                    
                logger.info(json.dumps(resp, indent=2))
                dir_path = os.path.join(config['dir']['metrics'], exp_name, file_id)
                os.makedirs(dir_path, exist_ok=True)
                fpath = os.path.join(dir_path, f"{file_id}_{model['model_id']}_rep{rep+1}.json")
                Path(fpath).write_text(json.dumps(resp, cls=CustomJSONEncoder, indent=2))        

[2024-01-12 13:56:25,436] p31920 {544935498.py:19} INFO - idx=0, tf=data/raw/0/call_center_transcript_1_transcript.txt, file_id=call_center_transcript_1, golden_summary_fpath=data/raw/0/example_prompt_transcript_golden_summary.txt
[2024-01-12 13:56:25,436] p31920 {544935498.py:29} INFO - exp_name=single-line-reason, prompt_template=titan_template.txt, reps=3
[2024-01-12 13:56:25,436] p31920 {544935498.py:35} INFO - going to summarize idx=0, file=data/raw/0/call_center_transcript_1_transcript.txt via amazon.titan-text-express-v1
[2024-01-12 13:56:25,437] p31920 {544935498.py:38} INFO - read prompt template from data/prompts/titan_template.txt
[2024-01-12 13:56:25,523] p31920 {544935498.py:43} INFO - prompt for data/raw/0/call_center_transcript_1_transcript.txt has 796 tokens
[2024-01-12 13:56:25,523] p31920 {528272729.py:4} INFO - model_id=amazon.titan-text-express-v1, prompt length is 3569 characters, 601 words
[2024-01-12 13:56:29,335] p31920 {528272729.py:41} INFO - {
  "exception": 