In [30]:
import os
import json
import glob
import logging
import pandas as pd
from pathlib import Path

In [31]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [32]:
RESULTS_DIR: str = "../results-*"
FIRST_N: int = 10

In [33]:
data_path = os.path.join(RESULTS_DIR, "per_inference", "*.json")
logger.info(f"looking for per inference files in {data_path}")
file_list = glob.glob(data_path)
logger.info(f"found {len(file_list)} files, listing first {FIRST_N}\n{file_list[:FIRST_N]}")

[2024-09-23 02:21:32,657] p5257 {1289656933.py:2} INFO - looking for per inference files in ../results-*/per_inference/*.json
[2024-09-23 02:21:32,660] p5257 {1289656933.py:4} INFO - found 950 files, listing first 10
['../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035183.2022789.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035144.6147513.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035172.558346.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035134.084935.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035177.7028358.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035189.8867545.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035136.9424248.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/1727035184.9625876.json', '../results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2/per_inference/

In [34]:
set([Path(f).parent.absolute().parent.name for f in file_list])

{'results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2',
 'results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2'}

In [35]:
# read the files into a dataframe
content = [json.loads(Path(f).read_text()) | dict(filename=Path(f).parent.absolute().parent.name) \
           for f in file_list]
logger.info(f"sample data {json.dumps(content[0], indent=2)}")
df = pd.DataFrame(content)
logger.info(f"shape of data from {df.shape}")
df.head()

[2024-09-23 02:21:32,785] p5257 {2288595646.py:4} INFO - sample data {
  "endpoint_name": "http://127.0.0.1:8080/invocations",
  "prompt": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\nYou are an assistant for question-answering tasks. Use the following pieces of retrieved context in the section demarcated by \"```\" to answer the question.\nThe context may contain multiple question answer pairs as an example, just answer the final question provided after the context.\nIf you dont know the answer just say that you dont know. Use three sentences maximum and keep the answer concise.\n\n```\nPassage 1:\nZoran \u0160vonja\nZoran \u0160vonja (Serbian Cyrillic: \u0417\u043e\u0440\u0430\u043d \u0428\u0432\u043e\u045a\u0430; born 4 October 1996) is a Serbian footballer who plays as a midfielder for  Mladost Novi Sad. He is the younger brother of footballer Goran \u0160vonja.\n\nClub career\nFrom 2016 to 2018, he played for OFK Ba\u010dka.\nPassage 2:\nAnton Shunto\nAnton Shunto (

Unnamed: 0,endpoint_name,prompt,question,ground_truth,payload_file,do_sample,temperature,top_p,top_k,max_new_tokens,...,time_to_last_token,uuid,experiment_name,concurrency,filename,bad_words,stop_words,pad_id,end_id,max_tokens
0,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,"Who is younger, Peter Rosegger or Ruel Redinger?",Ruel Redinger,payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,0089502e9689419492394f5e16996401,Meta-Llama-3-8B-Instruct,1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,,,,,
1,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,What is the capitol of the district that also ...,Gambaga,payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,0d2756ecc7b94e67827fb9ef0b183ad7,Meta-Llama-3-8B-Instruct,1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,,,,,
2,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,"Passage:\nPennsylvania State Capitol, Harrisbu...","Harrisburg (PA),Harrisburg, PA,Harrisburg, Pen...",payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,ad6964f4d98c442e81ac254210de4da6,Meta-Llama-3-8B-Instruct,1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,,,,,
3,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,"Passage:\nDeath to Spies\nReleased in 2007, De...","Smert' Shpionam,SMERSH,СМЕРШ,Smersh,NKVD Speci...",payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,7353aa0693564b52a6f9e53782cae7a8,Meta-Llama-3-8B-Instruct,1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,,,,,
4,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,Passage:\nDanube Waltz River Cruise Videos - 2...,"Budimpešta,Budapest,Veres Péter Gimnázium,Buda...",payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,3e37cbc76a83402e83ce420df701bc94,Meta-Llama-3-8B-Instruct,1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,,,,,


In [36]:
# analyze payload_en_3000-3840 for a couple of result folders
"""
"payload_file": "payload_en_1-500.jsonl",
  "do_sample": true,
  "temperature": 0.1,
  "top_p": 0.92,
  "top_k": 120,
  "max_new_tokens": 100,
  "completion": "{\"generated_text\": \"\\n\\n```Stauntonia is a genus of flowering plants in the family Lardizabalaceae. It is named after George Staunton, who brought it to Britain from China in the 19th century.\\n\\nSpecies\\nSpecies accepted by the Plants of the World Online as of March 2023:\\nSinofranchetia\\nSinofranchetia is a genus of flowering plant in the Lardizabalaceae family. It contains a single species, Sinofr\"}",
  "prompt_tokens": 328,
  "completion_tokens": 111,
  "latency": 2.4156091760087293,
  "time_to_first_token": null,
  "time_per_output_token": null,
  "time_to_last_token": null,
  "uuid": "5aca6360ee5244a68d2a951f0067d68b",
  "experiment_name": "Meta-Llama-3-8B-Instruct",
  "concurrency": 4,
  "filename": "results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2"
"""
PAYLOAD_OF_INTEREST: str = "payload_en_3000-3840.jsonl"
df1 = df[(df.payload_file == PAYLOAD_OF_INTEREST)]
RESULT_FOLDER1_OF_INTEREST: str = "results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2"
# df1 = df[(df.payload_file == PAYLOAD_OF_INTEREST) & \
#          (df.filename == RESULT_FOLDER1_OF_INTEREST)]
# logger.info(f"shape of dataframe with data for {PAYLOAD_OF_INTEREST} and {RESULT_FOLDER1_OF_INTEREST} is {df1.shape}")
# df1.head()

In [37]:
df2 = df1[['filename', 'completion_tokens', 'latency', 'question', 'ground_truth', 'completion']]
df2

Unnamed: 0,filename,completion_tokens,latency,question,ground_truth,completion
0,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,14,1.207794,"Who is younger, Peter Rosegger or Ruel Redinger?",Ruel Redinger,"{""generated_text"": ""\n\nI don't know.""}"
1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,34,1.484708,What is the capitol of the district that also ...,Gambaga,"{""generated_text"": ""\n\nI don't know. The prov..."
2,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,46,1.566873,"Passage:\nPennsylvania State Capitol, Harrisbu...","Harrisburg (PA),Harrisburg, PA,Harrisburg, Pen...","{""generated_text"": ""\n\nI can answer your ques..."
3,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,19,1.165957,"Passage:\nDeath to Spies\nReleased in 2007, De...","Smert' Shpionam,SMERSH,СМЕРШ,Smersh,NKVD Speci...","{""generated_text"": ""\n\nI don't know the answe..."
4,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,30,1.279127,Passage:\nDanube Waltz River Cruise Videos - 2...,"Budimpešta,Budapest,Veres Péter Gimnázium,Buda...","{""generated_text"": ""\n\nI can help you with th..."
...,...,...,...,...,...,...
932,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,6,8.699644,"Passage:\nCostermonger\nCostermonger, coster, ...","Fruiting,Friut,Fruiting Shrubs,Prutas,Bacca,Nu...",Fruit and vegetables.
938,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,72,5.247039,"Which film has the director who died first, Fo...",Sandflow,"According to the provided context, Duilio Cole..."
943,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,40,2.877437,"Which film has the director born earlier, Dos ...",Dos Basuras,"According to the context, Dos Basuras was rele..."
944,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,36,2.559343,Which as more species Cassiope or Deutzia?,Deutzia,"According to the provided context, Cassiope ha..."


In [38]:
from typing import Dict
def extract_answer(x):
    if "generated_text" in x:
        return json.loads(x).get('generated_text')
    return x
df2['completion'] = df2.completion.map(extract_answer)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['completion'] = df2.completion.map(extract_answer)


In [39]:
df2

Unnamed: 0,filename,completion_tokens,latency,question,ground_truth,completion
0,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,14,1.207794,"Who is younger, Peter Rosegger or Ruel Redinger?",Ruel Redinger,\n\nI don't know.
1,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,34,1.484708,What is the capitol of the district that also ...,Gambaga,\n\nI don't know. The provided context does no...
2,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,46,1.566873,"Passage:\nPennsylvania State Capitol, Harrisbu...","Harrisburg (PA),Harrisburg, PA,Harrisburg, Pen...",\n\nI can answer your question based on the pr...
3,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,19,1.165957,"Passage:\nDeath to Spies\nReleased in 2007, De...","Smert' Shpionam,SMERSH,СМЕРШ,Smersh,NKVD Speci...",\n\nI don't know the answer to this question.
4,results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2,30,1.279127,Passage:\nDanube Waltz River Cruise Videos - 2...,"Budimpešta,Budapest,Veres Péter Gimnázium,Buda...",\n\nI can help you with that!\n\nAccording to ...
...,...,...,...,...,...,...
932,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,6,8.699644,"Passage:\nCostermonger\nCostermonger, coster, ...","Fruiting,Friut,Fruiting Shrubs,Prutas,Bacca,Nu...",Fruit and vegetables.
938,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,72,5.247039,"Which film has the director who died first, Fo...",Sandflow,"According to the provided context, Duilio Cole..."
943,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,40,2.877437,"Which film has the director born earlier, Dos ...",Dos Basuras,"According to the context, Dos Basuras was rele..."
944,results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2,36,2.559343,Which as more species Cassiope or Deutzia?,Deutzia,"According to the provided context, Cassiope ha..."


In [40]:
comparison = {}
for r in df2.iterrows():
    r = r[1]
    if r['question'] not in comparison:
        comparison[r['question']] = []
    comparison[r['question']].append(dict(r))
    


In [41]:
Path("comparison.json").write_text(json.dumps(comparison, indent=2))

683754

In [42]:
for k in comparison.keys():
    print(json.dumps(comparison[k], indent=2))
    break

[
  {
    "filename": "results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2",
    "completion_tokens": 14,
    "latency": 1.2077943189997313,
    "question": "Who is younger, Peter Rosegger or Ruel Redinger?",
    "ground_truth": "Ruel Redinger",
    "completion": "\n\nI don't know."
  },
  {
    "filename": "results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2",
    "completion_tokens": 6,
    "latency": 4.535382598000069,
    "question": "Who is younger, Peter Rosegger or Ruel Redinger?",
    "ground_truth": "Ruel Redinger",
    "completion": "I don't know."
  },
  {
    "filename": "results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2",
    "completion_tokens": 6,
    "latency": 2.3120776129999285,
    "question": "Who is younger, Peter Rosegger or Ruel Redinger?",
    "ground_truth": "Ruel Redinger",
    "completion": "I don't know."
  },
  {
    "filename": "results-llama3-8b-g5.12xl-tp=4-mc=max-triton-ec2",
    "completion_tokens": 6,
    "latency": 1.1784457699995983,
    "question": "Who is y