## Handle asynchronous calls for Bedrock Models with concurrent calls

In [81]:
import os
import re
import sys
import json
import yaml
import time
import glob
import logging
import pandas as pd
from typing import Dict
from pathlib import Path
from tokenizer_utils import count_tokens
from bedrock_utils import get_bedrock_client
from utils import  (
    get_rouge_l_score,
    get_cosine_similarity,
    parse_model_response,
    is_amazon_model)

In [82]:
## Set the logger to log all of the information in
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)


In [83]:
## Initialize the config file to get all global constants
CONFIG_FILE_PATH = "config.yaml"


In [84]:
# read the config yaml file
fpath = CONFIG_FILE_PATH
with open(fpath, 'r') as yaml_in:
    config = yaml.safe_load(yaml_in)
logger.info(f"config read from {fpath} -> {json.dumps(config, indent=2)}")

[2024-01-12 15:11:10,708] p32262 {3034282685.py:5} INFO - config read from config.yaml -> {
  "app_name": "contact-center-transcript-summarization",
  "aws": {
    "region": "us-east-1",
    "sagemaker_execution_role": "Admin"
  },
  "dir": {
    "data": "data",
    "raw": "data/raw",
    "golden": "data/raw/golden",
    "prompts": "data/prompts",
    "models": "data/models",
    "metrics": "data/metrics",
    "completions": "data/completions",
    "async_completions": "data/async_completions"
  },
  "data": {
    "raw_data_file": "data.csv",
    "golden_transcript": "data/raw/golden/transcript.txt",
    "golden_transcript_summary": "data/raw/golden/summary.txt"
  },
  "prompt": {
    "very_large_prompt": {
      "sleep_time": 180,
      "threshold": 70000
    },
    "normal_prompt": {
      "sleep_time": 60
    }
  },
  "max_retries": 3,
  "desired_word_count_for_summary": 80,
  "experiments": [
    {
      "name": "single-line-reason",
      "prompt_template": null,
      "reps": 3,


In [85]:
## Initialize the bedrock client
bedrock_client = get_bedrock_client()

[2024-01-12 15:11:16,551] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Create new client
  Using region: None
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)


In [86]:
boto3_bedrock = get_bedrock_client(runtime=False)
fm_list_response = boto3_bedrock.list_foundation_models()
fm_list = fm_list_response['modelSummaries']
df_fm = pd.DataFrame(fm_list)
display(df_fm)


[2024-01-12 15:11:17,212] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Create new client
  Using region: None
boto3 Bedrock client successfully created!
bedrock(https://bedrock.us-east-1.amazonaws.com)


Unnamed: 0,modelArn,modelId,modelName,providerName,inputModalities,outputModalities,responseStreamingSupported,customizationsSupported,inferenceTypesSupported,modelLifecycle
0,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-tg1-large,Titan Text Large,Amazon,[TEXT],[TEXT],True,[],[ON_DEMAND],{'status': 'ACTIVE'}
1,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-e1t-medium,Titan Text Embeddings,Amazon,[TEXT],[EMBEDDING],,[],[ON_DEMAND],{'status': 'LEGACY'}
2,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-image-generator-v1:0,Titan Image Generator G1,Amazon,"[TEXT, IMAGE]",[IMAGE],,[FINE_TUNING],"[ON_DEMAND, PROVISIONED]",{'status': 'ACTIVE'}
3,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-image-generator-v1,Titan Image Generator G1,Amazon,"[TEXT, IMAGE]",[IMAGE],,[],[ON_DEMAND],{'status': 'ACTIVE'}
4,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-embed-g1-text-02,Titan Text Embeddings v2,Amazon,[TEXT],[EMBEDDING],,[],[ON_DEMAND],{'status': 'ACTIVE'}
5,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-lite-v1:0:4k,Titan Text G1 - Lite,Amazon,[TEXT],[TEXT],True,"[FINE_TUNING, CONTINUED_PRE_TRAINING]",[PROVISIONED],{'status': 'ACTIVE'}
6,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-lite-v1,Titan Text G1 - Lite,Amazon,[TEXT],[TEXT],True,[],[ON_DEMAND],{'status': 'ACTIVE'}
7,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-express-v1:0:8k,Titan Text G1 - Express,Amazon,[TEXT],[TEXT],True,"[FINE_TUNING, CONTINUED_PRE_TRAINING]",[PROVISIONED],{'status': 'ACTIVE'}
8,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-text-express-v1,Titan Text G1 - Express,Amazon,[TEXT],[TEXT],True,[],[ON_DEMAND],{'status': 'ACTIVE'}
9,arn:aws:bedrock:us-east-1::foundation-model/am...,amazon.titan-embed-text-v1:2:8k,Titan Embeddings G1 - Text,Amazon,[TEXT],[EMBEDDING],False,[],[PROVISIONED],{'status': 'ACTIVE'}


In [87]:
transcript_files = glob.glob(os.path.join(config['dir']['raw'], "*", "*transcript.txt"))
logger.info(f"found {len(transcript_files)} transcript_files ->\n{transcript_files}")

[2024-01-12 15:11:18,426] p32262 {3139140113.py:2} INFO - found 5 transcript_files ->
['data/raw/0/call_center_transcript_1_transcript.txt', 'data/raw/1/call_center_transcript_0_transcript.txt', 'data/raw/4/call_center_transcript_4_transcript.txt', 'data/raw/3/call_center_transcript_3_transcript.txt', 'data/raw/2/call_center_transcript_2_transcript.txt']


In [88]:
## Re load the utils file to make sure all functions are included from the utils class before importing them
import importlib
import utils
importlib.reload(utils)

<module 'utils' from '/Users/madhurpt/Downloads/bedrock-contact-center-tasks-eval-main-3/utils.py'>

In [89]:
from json import JSONEncoder
import re
from pathlib import Path
import os
import time
import utils
import json
from create_payload import model_payloads



## Create a payload referring to the model_payloads script
def create_payloads_for_all_models(transcript_files, config):

    ## initialize a payload dict
    all_payloads = []

    ## Iterate through all of the transcript files available
    for idx, tf in enumerate(transcript_files):
        transcript = Path(tf).read_text()
        fname = os.path.basename(tf)
        file_id = "_".join(fname.split('_')[:-1])

        ## Iterate through every experiment for each transcript file
        for experiment in config['experiments']:
            exp_name = experiment['name']
            model_list = experiment['model_list']
            
            ## Iterate through each model for each transcript file
            for model_info in model_list:
                model_name = model_info['model']
                model = config['bedrock_models'].get(model_name)

                if model is None:
                    logger.error(f"model={model_name} not found in bedrock_models")
                    continue
                
                # Use the imported create_payload function to generate the payload
                payload_dict = {
                    "model_id": model_name,
                    "payload": model_payloads(transcript, model_name)
                }
                all_payloads.append(payload_dict)

    return all_payloads

print("All model payloads per transcript per bedrock model ->")
create_payloads_for_all_models(transcript_files, config)



All model payloads per transcript per bedrock model ->


[{'model_id': 'amazon.titan-text-express-v1',
  'payload': {'inputText': "A: I wanted to discuss our strategy around generative AI and how we should approach this emerging technology. As you know, several applications have captured a lot of attention recently. \nAction item: Set up a follow-up meeting to brainstorm ideas for where generative AI could be applicable in our products\n\nB: Yes, generative AI is definitely a hot topic right now. All the major tech companies seem to be investing heavily in this space.\nAction item: Research current generative AI initiatives at other tech companies to analyze the competitive landscape\n\nA: Exactly. I think we need to have a plan here too or risk falling behind. What kind of applications do you see for generative AI in our products? Could it be used to automate certain processes or enhance our users' experience?\nAction item: Outline high-level ideas for where generative AI could drive automation or enhance user experience in our products\n\n

In [90]:
import asyncio
import json
import requests as req
import botocore.session
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
from typing import Dict, List

SERVICE_NAME = 'bedrock'
region = 'us-east-1'

## Utilizing the bedrock REST API to get inference from each bedrock model
def get_inference(model_id: str, payload: Dict) -> Dict:
    try:
        endpoint = f"https://{SERVICE_NAME}-runtime.{region}.amazonaws.com/model/{model_id}/invoke"
        request_body = json.dumps(payload)

        request = AWSRequest(method='POST', url=endpoint, data=request_body, headers={'content-type': 'application/json'})
        session = botocore.session.Session()
        sigv4 = SigV4Auth(session.get_credentials(), SERVICE_NAME, region)
        sigv4.add_auth(request)
        prepped = request.prepare()

        response = req.post(prepped.url, headers=prepped.headers, data=request_body)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error: Received status code {response.status_code}, Response: {response.text}")
            return None
    except Exception as e:
        print(f"Exception occurred: {e}")
        return None

## Utilize the async calls for creating a thread of model invocations
async def async_calls_on_model(model_id, payload):
    try:
        response = await asyncio.to_thread(get_inference, model_id, payload)
        return response
    except Exception as e:
        print(f"Error in async_calls_on_model: {e}")
        return None

async def async_invoke_model(model_name, payloads):
    responses = []
    for payload in payloads:
        response = await async_calls_on_model(model_name, payload)
        responses.append(response)
    return responses


In [91]:
import csv

## Filter the model payloads by the type of model id offering at bedrock for simpler calling
def filter_payloads_for_model(model_name, all_payloads):
    return [payload for payload in all_payloads if payload['model_id'] == model_name]

## Function to process a model through invoking it with transcripts through various concurrencies in config.yml
async def process_model(model_info, all_payloads, csv_writer, concurrency_level):
    model_id = model_info['model']
    ## filter the model by the id
    model_payloads = filter_payloads_for_model(model_id, all_payloads)

    ## loop through the conc levels and the model_payloads
    for i in range(0, len(model_payloads), concurrency_level):
        batch_payloads = [payload['payload'] for payload in model_payloads[i:i + concurrency_level]]
        print(f"Running {model_id} at concurrency level {concurrency_level} with {len(batch_payloads)} requests...")

        ## track metrics: latency
        start_time = time.time()
        responses = await async_invoke_model(model_id, batch_payloads)
        end_time = time.time()
        latency = (end_time - start_time)  # in seconds

        # Log and write each response to the CSV
        for j, response in enumerate(responses):
            csv_writer.writerow({
                'Model_id': model_id,
                'call transcript': batch_payloads[j],
                'Concurrency Level': concurrency_level,
                'Latency (seconds)': latency,
                'Response': json.dumps(response)
            })
            
            print(f"Response {j+1} at concurrency level {concurrency_level}: {response}")

        print(f"Latency: {latency} seconds\n")

## save to csv function
def init_csv_file(csv_file_path):
    with open(csv_file_path, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=['Model_id', 'call transcript', 'Concurrency Level', 'Latency (seconds)', 'Response'])
        writer.writeheader()

In [None]:
## Function to process all of the models to invoke all transcripts one by one based on the concurrency level
async def process_all_models(all_payloads, config, csv_file_path):
    with open(csv_file_path, 'a', newline='') as file:
        csv_writer = csv.DictWriter(file, fieldnames=['Model_id', 'call transcript', 'Concurrency Level', 'Latency (seconds)', 'Response'])

        ## Loop through each concurrency level for each experiment and each model, and create tasks to call process model on the model id and payloads
        for concurrency_level in range(1, max(max(model_info['concurrency_levels']) for experiment in config['experiments'] for model_info in experiment['model_list']) + 1):
            tasks = []
            for experiment in config['experiments']:
                for model_info in experiment['model_list']:
                    if concurrency_level in model_info['concurrency_levels']:
                        task = asyncio.create_task(process_model(model_info, all_payloads, csv_writer, concurrency_level))
                        tasks.append(task)
            await asyncio.gather(*tasks)

csv_file_path = 'async_bedrock_model_performance.csv'
init_csv_file(csv_file_path)

# Create payloads
all_payloads = create_payloads_for_all_models(transcript_files, config)

# Run the event loop and get results
loop = asyncio.get_event_loop()
if loop.is_running():
    task = asyncio.ensure_future(process_all_models(all_payloads, config, csv_file_path))
else:
    loop.run_until_complete(process_all_models(all_payloads, config, csv_file_path))

[2024-01-12 15:11:30,943] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:11:30,943] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:11:30,944] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Running amazon.titan-text-express-v1 at concurrency level 1 with 1 requests...
Running anthropic.claude-instant-v1 at concurrency level 1 with 1 requests...
Running cohere.command-text-v14 at concurrency level 1 with 1 requests...


[2024-01-12 15:11:34,249] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'generations': [{'finish_reason': 'MAX_TOKENS', 'id': '7f885b12-cfb8-4069-b155-2b2a8623e5cb', 'text': " should not be used for. Otherwise, we could open ourselves up to potential misuse. \n\nAction item: Create a governance framework to outline appropriate use cases and ensure fair and responsible use of generative AI technology. This framework will help us stay aligned as we implement this technology.\n\nA: Thank you all for the productive discussion. Let's come back together to review our progress on these action items in a week. In the meantime, please share your initial thoughts and any relevant materials with the"}], 'id': 'd7446088-0cd3-49cb-9348-8c4315474047', 'prompt': "A: I wanted to discuss our strategy around generative AI and how we should approach this emerging technology. As you know, several applications have captured a lot of attention recently. \nAction item: Set up a follow-up meeting to brainstorm ideas for where generative AI coul

[2024-01-12 15:11:35,608] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:11:35,664] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'inputTextTokenCount': 282, 'results': [{'tokenCount': 128, 'outputText': " should not be used for to ensure that we're adhering to our company's values and legal requirements.\nAction item: Draft guidelines for responsible use of generative AI and present them to leadership for approval\n\nA: That's a good point. We should also consider how we can integrate generative AI with other technologies like natural language processing and machine learning to create even more powerful and personalized experiences for our users.\nAction item: Explore potential partnerships with companies that specialize in these areas to integrate generative AI into our products\n\nC: Another thing to think about is how we can train our employees to work with generative AI and", 'completionReason': 'LENGTH'}]}
Latency: 4.674283981323242 seconds

Running amazon.titan-text-express-v1 at concurrency level 1 with 1 requests...
Response 1 at concurrency level 1: {'completion': ' Y

[2024-01-12 15:11:37,589] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'generations': [{'finish_reason': 'MAX_TOKENS', 'id': '30aa985e-aeee-434a-8c1d-ee2a43c485f8', 'text': ' Based on the conversation, it appears that there are issues with the user experience surrounding the entry or access of users to a product or service. A complex and time-consuming process is currently in place. To improve this, it is proposed to simplify the user experience by removing friction and making it easier for users to find and access the product. In order to achieve this, it is decided that work will be done on the landing page and the forms users must fill out. \n\nAre there any further'}], 'id': 'a94a0a0d-95c4-4a63-9f63-e9c4f89f035c', 'prompt': 'Meeting transcript: \nA: Hi B, I want to discuss the workstream for our new product launch\nB: Sure A, is there anything in particular you want to discuss?\nA: Yes, I want to talk about how users enter into the product.  \nB: Ok, in that case let me add in C.\nC: Hey everyone\nB: Hi C, A wants t

[2024-01-12 15:11:38,275] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'inputTextTokenCount': 234, 'results': [{'tokenCount': 69, 'outputText': ' I will document all the issues and send it to you by EOD.\nA and B discuss how users enter into the product. A wants to remove friction and make it more discoverable, while B can work on the additional forms and unblock the sign up workflow. A will document any other concerns and send them to B by EOD.', 'completionReason': 'FINISH'}]}
Latency: 2.673892021179199 seconds

Running amazon.titan-text-express-v1 at concurrency level 1 with 1 requests...


[2024-01-12 15:11:39,728] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'completion': ' Here is a summary of the key discussion points:\n\n- A wanted to discuss how users enter the new product, specifically any friction points in the onboarding process. \n\n- A found it too complicated with additional forms that added unnecessary steps. The landing page also made it difficult to find how to access the product.\n\n- B agreed there were too many steps in the process. \n\n- C offered to work on improving discoverability on the landing page. B agreed to work on streamlining the additional forms but would need to coordinate with James from another team to unblock the signup workflow. \n\n- A offered to document any other concerns upfront so B could discuss everything at once with James to reduce back-and-forth.\n\nThe main topics discussed were friction points in the user onboarding experience, including unnecessary forms, too many steps, and discoverability on the landing page. Action items were assigned to C to improve the 

[2024-01-12 15:11:40,511] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'generations': [{'finish_reason': 'COMPLETE', 'id': '4be8a4b5-27f2-4c19-a11f-8ced812da72b', 'text': ' In this conversation, the team is discussing how to choose the best cloud instance type to host their product recommendation model. They have determined that they need a GPU-powered instance to provide fast predictions and a good user experience. They will need to choose a specific GPU instance type to find the best balance of throughput and cost. The team will explore options such as NVIDIA T4 and A100 GPUs and Elastic Inference GPU attachments. '}], 'id': '54b95bc2-6cec-4298-a7c8-55d9b09fda54', 'prompt': "A: I wanted to further discuss options for the optimal cloud instance type to host our new product recommendation model. As a refresher on the requirements - this model will be query intensive, with thousands of customers hitting the prediction API simultaneously. And it relies on a large deep neural network for the recommendations. \n\nB: Yes, we

[2024-01-12 15:11:42,803] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'inputTextTokenCount': 460, 'results': [{'tokenCount': 128, 'outputText': "\n\nA: That's a valid point. We should definitely do some testing to see if it would impact the model performance. Another option to consider is using serverless computing platforms like AWS Lambda or GCP Cloud Functions for the prediction API. They could handle the increased workload and scale automatically without the need for managing instances.\n\nB: That's a great suggestion. Serverless would also eliminate the need for us to worry about instance types and scaling. However, we would need to ensure that the serverless platform can handle the latency requirements of our deep learning model and provide enough concurrent execution capacity.\n\nA: Absolutely,", 'completionReason': 'LENGTH'}]}
Latency: 4.5204758644104 seconds

Running amazon.titan-text-express-v1 at concurrency level 1 with 1 requests...


[2024-01-12 15:11:43,831] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:11:43,981] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'generations': [{'finish_reason': 'MAX_TOKENS', 'id': '3ea87410-de11-4cd6-89cc-a3c125887546', 'text': " As an AI model, I can provide some insights to your team's discussion on developing responsible AI systems. \n\nThe team has identified several key considerations in ensuring responsible development of AI systems: \n\n1. Safety: The team discusses the need to develop a safety review process for new AI models and outline additional risks, such as data bias and security concerns. This proactive approach ensures that potential hazards are identified and mitigated early in the development process. \n2. Value Alignment: Team members agree that"}], 'id': '34ddbb45-5b5d-4213-b1e3-c0fdb58502de', 'prompt': "A: I wanted to have a broader discussion on responsible AI development principles. As we build more AI models, how do we ensure they are helpful, harmless, and aligned with human values?\n\nB: This is an important topic. We need to make ethical considera

[2024-01-12 15:11:47,240] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'generations': [{'finish_reason': 'MAX_TOKENS', 'id': 'ac39d7fe-1177-4483-ae26-c5baf7344c14', 'text': " Great discussion, team! Let's summarize and assign the following action items:\n\n- A: Research mobile gaming trends and identify potential niches to target, with a focus on markets that have been overlooked. \n- B: Outline the pros and cons of different monetization models (freemium, paid, in-game ads, sponsorships) and recommend the best approach for our target audience.\n- C: Explore in-game ad networks and their potential benefits, along with retention strategies and"}], 'id': '05f408d4-367d-4bcb-a3aa-def050f8f7d8', 'prompt': 'A: I wanted to discuss ideas for a new startup in the mobile gaming space. The success of companies like Angry Birds shows there is a lot of potential still untapped. \nAction item: Research recent trends and growth in mobile gaming to identify opportunities\n\nB: I agree mobile gaming is hot right now. Have you thought a

[2024-01-12 15:11:49,942] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'completion': ' You all raise excellent points. A few additional thoughts:\n\n- Consider how to involve diverse stakeholders, including subject matter experts, in the development process. Their perspectives can help surface unintended consequences.\n\n- Be transparent not just about model decisions, but the entire development process. This builds trust.\n\n- Plan for ongoing human oversight. Even the most carefully designed systems may require mid-flight adjustments as real-world use uncovers new issues. \n\n- Focus on societal impacts, not just technical metrics. Ensure any AI application meaningfully benefits humanity. \n\n- Remain vigilant about potential harms like discrimination, privacy risks, job disruption and more. New issues will emerge over time. \n\n- Continually re-evaluate objectives as society\'s values change. What is considered "helpful" today may differ in five or ten years. \n\n- Share knowledge and collaborate across companies/res

[2024-01-12 15:11:56,966] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:11:56,966] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:11:56,966] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 1: {'completion': " Here are some key points we discussed:\n\n- Mobile gaming is a hot space with ongoing opportunities, as shown by Angry Birds' success. \n\n- We should research trends to identify openings in the market. \n\n- Consider freemium vs paid models, balancing monetization strategies. \n\n- In-game ads/sponsorships are alternative revenue streams worth exploring. \n\n- User retention and engagement should be top priorities in design.\n\n- Rapidly prototyping concepts allows validating demand before heavy development. \n\n- AI tools could help speed prototyping by auto-generating basic assets/mechanics for testing.\n\n- While AI can automate pieces, official launches still require ensuring a quality standard.\n\nThe next steps we agreed on include:\n\n1. Researching mobile gaming trends and growth \n2. Outlining pros/cons of freemium vs paid models\n3. Exploring in-game ad networks and pricing models  \n4. Brainstorming user retention strategi

[2024-01-12 15:11:58,652] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:12:01,145] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:12:01,817] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:12:01,892] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 2: {'generations': [{'finish_reason': 'COMPLETE', 'id': '502b905f-98e7-4165-a136-49aa1d24d35b', 'text': " shouldn't do for our products to ensure consistent and appropriate application of the technology.\nAction item: Create a document outlining the ethical guidelines for using generative AI, and specify what consequences should apply if these guidelines are not followed."}], 'id': '83b8afcf-7ff7-44b8-9f1d-e278392cdfdb', 'prompt': "A: I wanted to discuss our strategy around generative AI and how we should approach this emerging technology. As you know, several applications have captured a lot of attention recently. \nAction item: Set up a follow-up meeting to brainstorm ideas for where generative AI could be applicable in our products\n\nB: Yes, generative AI is definitely a hot topic right now. All the major tech companies seem to be investing heavily in this space.\nAction item: Research current generative AI initiatives at other tech companies to anal

[2024-01-12 15:12:04,286] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 2: {'inputTextTokenCount': 282, 'results': [{'tokenCount': 128, 'outputText': " should not be used for, and have a process in place to evaluate new applications as they come up.\n\nA: That's a good point. We should also keep an eye on the regulatory landscape around generative AI and make sure we're compliant.\n\nAction item: Research the regulatory landscape around generative AI and identify any potential risks or compliance issues\n\nBy setting up a follow-up meeting to brainstorm ideas for where generative AI could be applicable in our products, researching current generative AI initiatives at other tech companies to analyze the competitive landscape, outlining high-level ideas for where generative AI could drive automation or", 'completionReason': 'LENGTH'}]}
Response 2 at concurrency level 2: {'inputTextTokenCount': 234, 'results': [{'tokenCount': 65, 'outputText': '\nA wants to discuss how users enter into the product and proposes removing friction

[2024-01-12 15:12:04,886] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:12:05,154] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 2: {'completion': " You all raise excellent points. While generative AI shows promise, ensuring safety, accuracy and ethics should be top priorities before product integration. Some ideas:\n\n- Start small - pilot low-risk applications to test validation frameworks. This allows learning while limiting potential harms. \n\n- Involve diverse stakeholders from the beginning. Bring in those representing various communities to advise on issues like bias, privacy and acceptable uses. \n\n- Be transparent. Clearly communicate model limitations and oversight measures to build trust with users and regulators. \n\n- Continuously evaluate. Monitor applications post-launch and make improvements based on real-world data to strengthen performance and address emerging concerns.\n\nThe technology is still evolving rapidly. Moving carefully and accountably now can help us reap benefits while avoiding pitfalls down the road. Does this perspective help further a thoughtful

[2024-01-12 15:12:06,369] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 2: {'generations': [{'finish_reason': 'COMPLETE', 'id': '17bbc838-cbfc-48cb-8ab0-b922180eac8e', 'text': " In this discussion, it is generally agreed that using GPU acceleration is necessary for the new product recommendation model because of the sub-second prediction latency needed to provide a good user experience. The team will consider different GPU options from major cloud providers, including NVIDIA's T4 and A100 GPUs, and explore Elastic Inference GPU attachments for fractional GPU access. Compatibility with the model and cost-effectiveness will be the main factors in the decision. "}], 'id': 'd67ec378-2360-405f-ba8b-06a80f0c0baa', 'prompt': "A: I wanted to further discuss options for the optimal cloud instance type to host our new product recommendation model. As a refresher on the requirements - this model will be query intensive, with thousands of customers hitting the prediction API simultaneously. And it relies on a large deep neural network f

[2024-01-12 15:12:08,597] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:12:09,338] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 2: {'generations': [{'finish_reason': 'MAX_TOKENS', 'id': '9e6cfca7-cc97-4209-acae-80f579d0a2ad', 'text': ' As the team behind a startup in the mobile gaming space, you each have a role to play in the development and success of the business. It is important to take into consideration your ideas and to take action on those ideas. \n\nHere are some potential tasks and assignments to consider: \n\n1. Research mobile gaming trends: Identify growth opportunities and gain a comprehensive understanding of the current market. This will help you all make informed decisions about the type of games to develop and how best to engage your'}], 'id': '750faf68-b5cf-4b9a-b792-4619f74311e7', 'prompt': 'A: I wanted to discuss ideas for a new startup in the mobile gaming space. The success of companies like Angry Birds shows there is a lot of potential still untapped. \nAction item: Research recent trends and growth in mobile gaming to identify opportunities\n\nB: I agree 

[2024-01-12 15:12:13,450] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[2024-01-12 15:12:13,509] p32262 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials


Response 1 at concurrency level 2: {'inputTextTokenCount': 460, 'results': [{'tokenCount': 128, 'outputText': "\n\nA: Agreed, it's worth investigating. Another option to consider is using serverless computing platforms like AWS Lambda or GCP Cloud Functions for the prediction API. These platforms can automatically scale to meet demand and can handle bursty traffic without the need for upfront infrastructure investments.\n\nB: That's a great point. Serverless could be particularly useful for handling unpredictable spikes in traffic. We'd need to ensure that the model can be deployed and optimized for serverless environments though.\n\nC: Definitely, there are some challenges with serverless, but it could be worth exploring as an alternative to managing our own infrastructure", 'completionReason': 'LENGTH'}]}
Response 2 at concurrency level 2: {'inputTextTokenCount': 385, 'results': [{'tokenCount': 128, 'outputText': "\n\nB: One other thing we could consider is how to involve a diverse r