### Run Inference: Generate titles for chapterized components from the processed data
---

In this notebook:

1. We are extracted the processed data, including the messages from speakers in transcripts

2. Loading those into our bedrock models: ***Titan, Claude, Llama***

3. Generate chapter titles for each chapter from respective meetings

In [1]:
import os
import json
import yaml
import copy
import time
import boto3
import logging
import pandas as pd  
from typing import Dict
from pathlib import Path
from litellm import completion ## support for text generation models on bedrock

#### Set a logger 

In [2]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

#### Load the config file: Contains model information, data directory information

In [3]:
## load the config file
# global constants
CONFIG_FILE_PATH = "config.yml"

In [4]:
# read the config yaml file
fpath = CONFIG_FILE_PATH
with open(fpath, 'r') as yaml_in:
    config = yaml.safe_load(yaml_in)
logger.info(f"config read from {fpath} -> {json.dumps(config, indent=2)}")

[2024-04-01 12:24:13,337] p84452 {3034282685.py:5} INFO - config read from config.yml -> {
  "app_name": "genai-chapterize-meeting-transcripts",
  "aws": {
    "region": "us-east-1"
  },
  "dir": {
    "data": "data",
    "raw": "data/source_data",
    "processed": "data/processed_data",
    "completions": "data/title_completions",
    "golden": "data/source_data/golden",
    "prompts": "data/prompts",
    "metrics": "data/metrics",
    "processed_file": "processed.csv",
    "chapterized_file": "chapterized.csv",
    "metrics_file": "per_request_results.csv",
    "summary_metrics_file": "summary_metrics.csv",
    "model_evals_file": "model_eval.csv",
    "file_type_to_process": "vtt"
  },
  "inference_parameters": {
    "temperature": 0.1,
    "caching": false
  },
  "title_generation_thresholds": {
    "max_chapter_length": 20
  },
  "experiments": [
    {
      "name": "chapterize-meeting-transcripts",
      "prompt_template": null,
      "model_list": [
        {
          "model": 

#### Function to generate titles to chapters (max cap at 15 lines per chapter)

In [5]:
# function to encapsulate call to any Bedrock model for text generation
def generate_chapter_title(model_id: str, prompt: str) -> Dict:
    # represents the service name
    service_name: str = "bedrock"
    # represents creating the bedrock model to invoke the litellm api for response for titan, llama and claude
    bedrock_model: str = f"{service_name}/{model_id}"
    # represents the current aws region
    aws_region = boto3.Session().region_name
    logger.info(f"model_id={model_id}, prompt length is {len(prompt)} characters, {len(prompt.split())} words")      

    # initialize the response dict
    ret = dict(exception = None,
               prompt = prompt,
               completion = None,
               file_name = None,
               chapter_id = None, 
               model_id = model_id,
               time_taken_in_seconds = None, 
               # initializing to 0 since none type throws an error later, this is used to calculate price per token input/output on ODT pricing
               completion_token_count = 0,
               # initializing to 0 since none type throws an error later
               prompt_token_count=0,
               input_token_price = None, 
               output_token_pricing = None, 
               chapter_text = None)
    body = ret['prompt']
    # set the env var for aws_region
    os.environ["AWS_REGION_NAME"] = aws_region
    # get the inference parameters from the config file
    parameters = copy.deepcopy(config['inference_parameters'])
    # get the temperature and caching for invoking the bedrock model via the lite llm api
    temperature = parameters.get('temperature', None)
    caching = parameters.get('caching', None)
    try:
        # Represents calling the litellm completion/messaging api utilizing the completion/embeddings API
        # [CLAUDE, LLAMA, ai21, MISTRAL, MIXTRAL, COHERE]
        logger.info(f"Invoking {bedrock_model}......")
        response = completion(model=bedrock_model,
                              messages=[{ "content": body,"role": "user"}],
                              temperature=temperature,
                              caching=caching)

        # iterate through the entire model response
        for idx, choice in enumerate(response.choices):
            # extract the message and the message's content from litellm
            if choice.message and choice.message.content:
                # extract the response from the dict
                ret["completion"] = choice.message.content
                logger.info(f"*********** idx={idx}, choice.message.content={choice.message.content}")
                #break

        # Extract number of input and completion prompt tokens (this is the same structure for embeddings and text generation models on Amazon Bedrock)
        ret['prompt_token_count'] = response.usage.prompt_tokens
        ret['completion_token_count'] = response.usage.completion_tokens
        # Extract latency in seconds
        latency_ms = response._response_ms
        ret['time_taken_in_seconds']  = latency_ms / 1000
    except Exception as e:
        logger.error(f"Exception occurred during invoking {model_id}, exception={e}")
        ret['exception'] = e
        
    return ret

#### Running invocations for all bedrock models to generate chapter titles for meetings

In [6]:
## Represents all of the processed csv files to be used to generate titles to chapters from each
processed_data_chapters_fpath = os.path.join(config['dir']['processed'], config['dir']['chapterized_file'])
logger.info(f"going to read data from {processed_data_chapters_fpath}")
# we want to run through all the rows of the processed dataframe for each model
# that we want to evaluate. So we will run a loop to `apply` each model on each row
df = pd.read_csv(processed_data_chapters_fpath)
logger.info(f"there are {len(df.file_name.unique())} vtt files to process")
df.head()

[2024-04-01 12:24:13,351] p84452 {3131961393.py:3} INFO - going to read data from data/processed_data/chapterized.csv
[2024-04-01 12:24:13,354] p84452 {3131961393.py:7} INFO - there are 1 vtt files to process


Unnamed: 0,file_name,chapter_id,text
0,particle_physics_meeting.vtt,1,"""Have you all seen the latest results from the..."
1,particle_physics_meeting.vtt,2,"""Not necessarily. With the advent of more powe..."
2,particle_physics_meeting.vtt,3,"""You make a valid point, David. As scientists,..."
3,particle_physics_meeting.vtt,4,"""*nods in agreement* To the pursuit of underst..."


In [7]:
def get_inference(row: pd.core.series.Series, total: int, model_info: Dict) -> Dict:

    logger.info(f"row {row.name}/{total}, prompt_template={model_info['prompt_template']}, model_id={model_info['model']}")
    model_id = model_info['model']
    
    fpath = os.path.join(config['dir']['prompts'], model_info['prompt_template'])
    prompt_template = Path(fpath).read_text()
    prompt = prompt_template.format(row['text']) ## create the payload for model inference
    resp = generate_chapter_title(model_id, prompt) ## generate the chapter title based on the given chapter in the prompt 

    ## store the chapter text to return in the metrics dir
    resp['chapter_text'] = row['text']
    resp['file_name'] = row['file_name']
    resp['chapter_id'] = row['chapter_id']
    resp['prompt_token_count'] = int(resp['prompt_token_count'])
    resp['completion_token_count'] = int(resp['completion_token_count'])
    ## calculate the input and output token price for all of the calls -------------------------------------------------
    resp['input_token_price'] = (resp['prompt_token_count']/1000) * model_info['input_tokens_pricing']
    logger.info(f"The price for {resp['prompt_token_count']} tokens for filename={row['file_name']} chapter={row['chapter_id']} is {resp['input_token_price']}")
    resp['output_token_pricing'] = (resp['completion_token_count']/1000) * model_info['output_tokens_pricing']
    logger.info(f"The price for {resp['completion_token_count']} tokens for filename={row['file_name']} chapter={row['chapter_id']} is {resp['output_token_pricing']}")

    dir_path = os.path.join(config['dir']['completions'], row['file_name'], model_id.replace(":", "-"))
    os.makedirs(dir_path, exist_ok=True)
    fpath = os.path.join(dir_path, f"chapter_{row['chapter_id']}.json")
    logger.info(f"writing response={resp} to {fpath}")
    Path(fpath).write_text(json.dumps(resp, default=str, indent=2))
    return resp
                

In [8]:
for experiment in config['experiments']:
   exp_name = experiment['name'] 
   model_list = experiment['model_list']
   for model_info in model_list:
       st = time.perf_counter()
       logger.info(f"------ running inference for {model_info['model']} -----")
       resp_list = df.apply(lambda r: get_inference(r, df.shape[0], model_info), axis=1)
       elapsed_time = time.perf_counter() - st
       logger.info(f"------ model={model_info['model']} completed in {elapsed_time} ------ ")


[2024-04-01 12:24:13,439] p84452 {2515565352.py:6} INFO - ------ running inference for mistral.mistral-7b-instruct-v0:2 -----
[2024-04-01 12:24:13,440] p84452 {1310555860.py:3} INFO - row 0/4, prompt_template=mistral_template.txt, model_id=mistral.mistral-7b-instruct-v0:2
[2024-04-01 12:24:13,446] p84452 {1760020676.py:9} INFO - model_id=mistral.mistral-7b-instruct-v0:2, prompt length is 2827 characters, 432 words
[2024-04-01 12:24:13,447] p84452 {1760020676.py:37} INFO - Invoking bedrock/mistral.mistral-7b-instruct-v0:2......
[2024-04-01 12:24:13,455] p84452 {credentials.py:1278} INFO - Found credentials in shared credentials file: ~/.aws/credentials
[92m12:24:13 - LiteLLM:INFO[0m: utils.py:992 - [92m
Request Sent from LiteLLM:

            response = client.invoke_model(
                body={"prompt": "<s>[INST] <s>[INST] <<SYS>>\nYou are a bot that generates titles for chapters of a meeting transcript. Use the following pieces of a chapter from a meeting transcript and generate 