### Generate metrics
---

In this notebook:

1. We will extract the titles generated as completions from the bedrock models (claude sonnet, llama, mistral)

2. Load these into a CSV file

3. Generate metrics on accuracy, performance, token throughput, inference, etc.

In [1]:
import os
import json
import yaml
import glob
import logging
import pandas as pd  
from ast import List
from typing import Dict
from pathlib import Path
from json import JSONEncoder

#### Set a logger 

In [2]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

#### Load the config file: Contains model information, data directory information

In [3]:
## load the config file
# global constants
CONFIG_FILE_PATH = "config.yml"

In [4]:
# read the config yaml file
fpath = CONFIG_FILE_PATH
with open(fpath, 'r') as yaml_in:
    config = yaml.safe_load(yaml_in)
logger.info(f"config read from {fpath} -> {json.dumps(config, indent=2)}")

[2024-04-01 16:14:06,591] p26176 {3034282685.py:5} INFO - config read from config.yml -> {
  "app_name": "genai-chapterize-meeting-transcripts",
  "aws": {
    "region": "us-east-1"
  },
  "dir": {
    "data": "data",
    "raw": "data/source_data",
    "processed": "data/processed_data",
    "completions": "data/title_completions",
    "golden": "data/source_data/golden",
    "prompts": "data/prompts",
    "metrics": "data/metrics",
    "processed_file": "processed.csv",
    "chapterized_file": "chapterized.csv",
    "metrics_file": "per_request_results.csv",
    "summary_metrics_file": "summary_metrics.csv",
    "model_evals_file": "model_eval.csv",
    "file_type_to_process": "vtt"
  },
  "run_steps": {
    "0_chapterize_data.ipynb": true,
    "1_generate_chapter_titles.ipynb": true,
    "2_summarize_metrics.ipynb": true
  },
  "inference_parameters": {
    "temperature": 0.1,
    "caching": false
  },
  "title_generation_thresholds": {
    "max_chapter_length": 20
  },
  "experiment

In [5]:
## Represents extracted all metric files
fpath = os.path.join(config['dir']['completions'], "**", "*", "*.json")
metric_files = glob.glob(fpath, recursive=True)
logger.info(f"there are {len(metric_files)} files in {fpath}")

[2024-04-01 16:14:06,610] p26176 {3931314738.py:4} INFO - there are 20 files in data/title_completions\**\*\*.json


#### Generate a simple CSV with metrics on title completions, chapters, and performance latency

In [6]:
metrics = []
for f in metric_files:
    metrics.append(json.loads(Path(f).read_text()))
df = pd.DataFrame(metrics)
df = df.drop(columns=['exception', 'prompt'])
df = df.sort_values(by=['file_name', 'model_id', 'chapter_id'])
df = df.rename(columns={'completion': 'chapter_title', 'time_taken_in_seconds': 'latency_seconds'})
logger.info(f"all metrics data is read into a dataframe of shape {df.shape}")
count = df.shape[0]

[2024-04-01 16:14:06,637] p26176 {3027428405.py:8} INFO - all metrics data is read into a dataframe of shape (20, 10)


In [7]:
df_per_model_id_counts = df['model_id'].value_counts()
df_per_model_id_counts

model_id
amazon.titan-text-express-v1               4
anthropic.claude-3-haiku-20240307-v1:0     4
anthropic.claude-3-sonnet-20240229-v1:0    4
meta.llama2-13b-chat-v1                    4
mistral.mistral-7b-instruct-v0:2           4
Name: count, dtype: int64

In [8]:
df.head()

Unnamed: 0,chapter_title,file_name,chapter_id,model_id,latency_seconds,completion_token_count,prompt_token_count,input_token_price,output_token_pricing,chapter_text
0,"Chapter: Higgs Boson, Dark Matter, and Quantum...",particle_physics_meeting.vtt,1,amazon.titan-text-express-v1,1.647802,13,537,0.00043,2.1e-05,"""Have you all seen the latest results from the..."
1,Unveiling the Mysteries of the Universe,particle_physics_meeting.vtt,2,amazon.titan-text-express-v1,1.554324,9,591,0.000473,1.4e-05,"""Not necessarily. With the advent of more powe..."
2,Balancing Open Mind and Scientific Rigor,particle_physics_meeting.vtt,3,amazon.titan-text-express-v1,1.37988,8,629,0.000503,1.3e-05,"""You make a valid point, David. As scientists,..."
3,Agreement and Enlightenment,particle_physics_meeting.vtt,4,amazon.titan-text-express-v1,1.162624,4,164,0.000131,6e-06,"""*nods in agreement* To the pursuit of underst..."
4,<title>Exploring the Frontiers of Particle Phy...,particle_physics_meeting.vtt,1,anthropic.claude-3-haiku-20240307-v1:0,1.271018,24,625,0.000156,3e-05,"""Have you all seen the latest results from the..."


In [9]:
metrics_dir = config['dir']['metrics']
# Create the directory if it doesn't exist
os.makedirs(metrics_dir, exist_ok=True)
# Construct the file path
metrics_file_path = os.path.join(metrics_dir, config['dir']['metrics_file'])
df.to_csv(metrics_file_path, index=False)

In [10]:
df_summary = df.groupby('model_id').mean(numeric_only=True)
df_summary['p95_latency_seconds'] = df.groupby('model_id')['latency_seconds'].quantile(0.95)
df_summary['avg_cost_per_txn'] = df_summary.input_token_price + df_summary.output_token_pricing
df_summary['p95_cost_per_txn'] = df.groupby('model_id')['input_token_price'].quantile(0.95) + \
                                 df.groupby('model_id')['output_token_pricing'].quantile(0.95)
df_summary.completion_token_count = df_summary.completion_token_count.astype(int)
df_summary.prompt_token_count = df_summary.prompt_token_count.astype(int)
df_summary['p95_completion_token_count'] = df.groupby('model_id')['completion_token_count'].quantile(0.95)
df_summary['p95_prompt_token_count'] = df.groupby('model_id')['prompt_token_count'].quantile(0.95)
df_summary = df_summary.drop(columns=['chapter_id'])
df_summary


Unnamed: 0_level_0,latency_seconds,completion_token_count,prompt_token_count,input_token_price,output_token_pricing,p95_latency_seconds,avg_cost_per_txn,p95_cost_per_txn,p95_completion_token_count,p95_prompt_token_count
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
amazon.titan-text-express-v1,1.436158,8,480,0.000384,1.4e-05,1.63378,0.000398,0.000518,12.4,623.3
anthropic.claude-3-haiku-20240307-v1:0,1.183008,22,555,0.000139,2.8e-05,1.294257,0.000166,0.00021,25.7,711.95
anthropic.claude-3-sonnet-20240229-v1:0,2.322253,21,555,0.001665,0.000315,2.593687,0.00198,0.002509,24.85,711.95
meta.llama2-13b-chat-v1,1.3237,13,488,0.000366,1.4e-05,1.612643,0.00038,0.000493,19.8,631.3
mistral.mistral-7b-instruct-v0:2,1.044126,24,496,7.4e-05,5e-06,1.403058,7.9e-05,0.000105,45.35,639.3


#### Calculate the long short view of the completions

In [11]:
index_cols = ['file_name', 'chapter_id', 'chapter_text']
def sanitize_title(title):
    if title is None:
        return title
    title = title.replace("<title>", "").replace("</title>", "")
    title = title.replace("Title:", "")
    title = title.replace("Chapter: ", "")
    title = title.replace("Chapter ", "")
    
    title = title.strip()
    title = title.split("\n")[0]
    return title
df.chapter_title = df.chapter_title.map(sanitize_title)
df_pivoted = df.pivot_table(index=index_cols, columns='model_id', values='chapter_title', aggfunc='first')
cols_other_than_index_cols = [f"{c}_title" for c in df_pivoted.columns if c not in index_cols]
df_pivoted = df_pivoted.reset_index()
df_pivoted.columns = index_cols + cols_other_than_index_cols
df_pivoted.head()

Unnamed: 0,file_name,chapter_id,chapter_text,amazon.titan-text-express-v1_title,anthropic.claude-3-haiku-20240307-v1:0_title,anthropic.claude-3-sonnet-20240229-v1:0_title,meta.llama2-13b-chat-v1_title,mistral.mistral-7b-instruct-v0:2_title
0,particle_physics_meeting.vtt,1,"""Have you all seen the latest results from the...","Higgs Boson, Dark Matter, and Quantum Gravity",Exploring the Frontiers of Particle Physics an...,Frontiers of Particle Physics Exploration,"""Unlocking the Secrets of the Higgs Boson""","Exploring the Higgs Boson: Applications, Dark ..."
1,particle_physics_meeting.vtt,2,"""Not necessarily. With the advent of more powe...",Unveiling the Mysteries of the Universe,Exploring the Frontiers of Physics and Cosmology,Frontiers of Physics: Antimatter and Cosmic En...,"""Unlocking the Secrets of the Universe: The Po...",Exploring Reality's Depths: Particle Accelerat...
2,particle_physics_meeting.vtt,3,"""You make a valid point, David. As scientists,...",Balancing Open Mind and Scientific Rigor,Balancing Open-Mindedness and Rigorous Standar...,Balancing Open-Mindedness and Rigor in Scienti...,"""Scientific Inquiry and the Pursuit of Truth""",Balancing Open-Mindedness and Scientific Stand...
3,particle_physics_meeting.vtt,4,"""*nods in agreement* To the pursuit of underst...",Agreement and Enlightenment,Commitment to Enlightening Discourse,"Enlightening Discussions, Open Exchange","""Pursuing Understanding and Open Ideas""",Pursuit of Understanding: Open Idea Exchange


In [12]:
# Construct the file path
movel_evals_fpath = os.path.join(metrics_dir, config['dir']['model_evals_file'])
df_pivoted.to_csv(movel_evals_fpath, index=False)

In [13]:

def create_summary(row, summary):
    return summary.format(model_id=row.name,
                          avg_latency=round(row['latency_seconds'], 4),
                          p95_latency=round(row['p95_latency_seconds'], 4),
                          avg_cost=round(10000*row['avg_cost_per_txn'], 6),
                          p95_cost_per_txn=round(10000*row['p95_cost_per_txn'], 6),
                          avg_prompt_token_count=row['prompt_token_count'].astype(int),
                          p95_prompt_token_count=row['p95_prompt_token_count'].astype(int),
                          avg_completion_token_count=row['completion_token_count'].astype(int),
                          p95_completion_token_count=row['p95_completion_token_count'].astype(int),
                          count=int(row['count']))
df_summary = pd.merge(left=df_summary, right=df_per_model_id_counts, on="model_id", how="left")

df_summary['overall_report'] = df_summary.apply(lambda r: create_summary(r, config['report']['summary_text']), axis=1)
df_summary = df_summary.round(6)

summary_metrics_file_path = os.path.join(metrics_dir, config['dir']['summary_metrics_file'])
df_summary.to_csv(summary_metrics_file_path, index=True)

In [14]:
# view the df_summary elements
df_summary.head()

Unnamed: 0_level_0,latency_seconds,completion_token_count,prompt_token_count,input_token_price,output_token_pricing,p95_latency_seconds,avg_cost_per_txn,p95_cost_per_txn,p95_completion_token_count,p95_prompt_token_count,count,overall_report
model_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
amazon.titan-text-express-v1,1.436158,8,480,0.000384,1.4e-05,1.63378,0.000398,0.000518,12.4,623.3,4,The average inference latency for this workloa...
anthropic.claude-3-haiku-20240307-v1:0,1.183008,22,555,0.000139,2.8e-05,1.294257,0.000166,0.00021,25.7,711.95,4,The average inference latency for this workloa...
anthropic.claude-3-sonnet-20240229-v1:0,2.322253,21,555,0.001665,0.000315,2.593687,0.00198,0.002509,24.85,711.95,4,The average inference latency for this workloa...
meta.llama2-13b-chat-v1,1.3237,13,488,0.000366,1.4e-05,1.612643,0.00038,0.000493,19.8,631.3,4,The average inference latency for this workloa...
mistral.mistral-7b-instruct-v0:2,1.044126,24,496,7.4e-05,5e-06,1.403058,7.9e-05,0.000105,45.35,639.3,4,The average inference latency for this workloa...
