In [143]:
import os
import json
import glob
import logging
import pandas as pd
from pathlib import Path

In [144]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [145]:
RESULTS_DIR: str = "../results-*"
FIRST_N: int = 10

In [146]:
data_path = os.path.join(RESULTS_DIR, "per_inference", "*.json")
logger.info(f"looking for per inference files in {data_path}")
file_list = glob.glob(data_path)
logger.info(f"found {len(file_list)} files, listing first {FIRST_N}\n{file_list[:FIRST_N]}")

[2024-09-23 20:35:14,362] p121634 {1289656933.py:2} INFO - looking for per inference files in ../results-*/per_inference/*.json
[2024-09-23 20:35:14,365] p121634 {1289656933.py:4} INFO - found 1037 files, listing first 10
['../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121590.5035987.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121633.7328534.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121754.196286.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121509.0425491.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121685.8263414.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121484.6407592.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121362.5149496.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_inference/1727121833.160436.json', '../results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2/per_infer

In [147]:
unique_result_files_list = list(set([Path(f).parent.absolute().parent.name for f in file_list]))
unique_result_files_list

['results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2',
 'results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2']

In [148]:
# read the files into a dataframe
content = [json.loads(Path(f).read_text()) | dict(filename=Path(f).parent.absolute().parent.name) \
           for f in file_list]
logger.info(f"sample data {json.dumps(content[0], indent=2)}")
df = pd.DataFrame(content)
logger.info(f"shape of data from {df.shape}")
df.head()

[2024-09-23 20:35:14,491] p121634 {2288595646.py:4} INFO - sample data {
  "endpoint_name": "http://127.0.0.1:8080/invocations",
  "prompt": "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nYou are an assistant for question-answering tasks. Use the following pieces of retrieved context in the section demarcated by \"```\" to answer the question. \nThe context may contain multiple question answer pairs as an example. Only answer the final question provided in the question section below.\nIf you dont know the answer just say that you dont know. Use three sentences maximum and keep the answer concise. \n\n```\nPassage 1:\nEricaceae\nThe Ericaceae () are a family of flowering plants, commonly known as the heath or heather family, found most commonly in acidic and infertile growing conditions. The family is large, with c. 4250 known species spread across 124 genera, making it the 14th most species-rich family of flowering plants. The many well known and economically important m

Unnamed: 0,endpoint_name,prompt,question,ground_truth,payload_file,do_sample,temperature,top_p,top_k,max_new_tokens,...,time_to_last_token,uuid,experiment_name,concurrency,filename,bad_words,stop_words,pad_id,end_id,max_tokens
0,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,Which as more species Cassiope or Deutzia?,Deutzia,payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,f8d4f8a95cf84a6d866513d8802bbbf9,Meta-Llama-3-8B-Instruct,2,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,,,,,
1,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,Both WAGS Atlanta and WAGS are what?,American reality television series,payload_en_500-1000.jsonl,True,0.1,0.92,120,100.0,...,,c77f4eeb09304c4fb927b8008be87886,Meta-Llama-3-8B-Instruct,4,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,,,,,
2,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,Passage:\nUnits of Measurement - University of...,"English inch,Inch (unit),International inch,De...",payload_en_2000-3000.jsonl,True,0.1,0.92,120,100.0,...,,4cbfd69056854939a36bb34268f87b85,Meta-Llama-3-8B-Instruct,6,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,,,,,
3,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,Are Kakwa River and Bighead River located in t...,yes,payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,a933e435434e4a90b80575cad2f0a3f1,Meta-Llama-3-8B-Instruct,1,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,,,,,
4,http://127.0.0.1:8080/invocations,<|begin_of_text|><|start_header_id|>user<|end_...,Passage:\nTubman: Conductor of the Underground...,National Underground Railroad Network to Freed...,payload_en_3000-3840.jsonl,True,0.1,0.92,120,100.0,...,,23c57b884e1d45dea30006d3132146fc,Meta-Llama-3-8B-Instruct,4,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,,,,,


In [149]:
# analyze payload_en_3000-3840 for a couple of result folders
"""
"payload_file": "payload_en_1-500.jsonl",
  "do_sample": true,
  "temperature": 0.1,
  "top_p": 0.92,
  "top_k": 120,
  "max_new_tokens": 100,
  "completion": "{\"generated_text\": \"\\n\\n```Stauntonia is a genus of flowering plants in the family Lardizabalaceae. It is named after George Staunton, who brought it to Britain from China in the 19th century.\\n\\nSpecies\\nSpecies accepted by the Plants of the World Online as of March 2023:\\nSinofranchetia\\nSinofranchetia is a genus of flowering plant in the Lardizabalaceae family. It contains a single species, Sinofr\"}",
  "prompt_tokens": 328,
  "completion_tokens": 111,
  "latency": 2.4156091760087293,
  "time_to_first_token": null,
  "time_per_output_token": null,
  "time_to_last_token": null,
  "uuid": "5aca6360ee5244a68d2a951f0067d68b",
  "experiment_name": "Meta-Llama-3-8B-Instruct",
  "concurrency": 4,
  "filename": "results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2"
"""
PAYLOAD_OF_INTEREST: str = "payload_en_3000-3840.jsonl" #payload_en_3000-3840.jsonl"
df1 = df[(df.payload_file == PAYLOAD_OF_INTEREST)]
RESULT_FOLDER1_OF_INTEREST: str = "results-llama3-8b-g5.12xl-tp=4-mc=max-djl-ec2"
# df1 = df[(df.payload_file == PAYLOAD_OF_INTEREST) & \
#          (df.filename == RESULT_FOLDER1_OF_INTEREST)]
# logger.info(f"shape of dataframe with data for {PAYLOAD_OF_INTEREST} and {RESULT_FOLDER1_OF_INTEREST} is {df1.shape}")
# df1.head()

In [150]:
df2 = df1[['filename', 'completion_tokens', 'latency', 'question', 'ground_truth', 'completion']]
df2

Unnamed: 0,filename,completion_tokens,latency,question,ground_truth,completion
0,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,44,1.692961,Which as more species Cassiope or Deutzia?,Deutzia,"{""generated_text"": ""\n\nAccording to the provi..."
3,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,24,1.271467,Are Kakwa River and Bighead River located in t...,yes,"{""generated_text"": ""\n\nYes, Kakwa River and B..."
4,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,117,4.177421,Passage:\nTubman: Conductor of the Underground...,National Underground Railroad Network to Freed...,"{""generated_text"": ""\n\nThe answer to the ques..."
5,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,19,1.376350,"Passage:\nGummo Marx\nMilton ""Gummo"" Marx (Oct...","Karl Marx,Karl Heinrich Marx,K. H. Marx,Marx, ...","{""generated_text"": ""\n\nI don't know the answe..."
11,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,14,1.123095,Hardley Flood is an area of lagoons that suppo...,duck,"{""generated_text"": ""\n\nI don't know.""}"
...,...,...,...,...,...,...
1019,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,23,1.322193,Passage:\nGuillemot\nGuillemots is the common ...,"Sea bird,Marine birds,Sea-bird,Marine bird,Sea...",I don't know the answer to this question. The ...
1020,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,6,1.132178,"Which film has the director born earlier, Dos ...",Dos Basuras,I don't know.
1023,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,20,1.279956,Are Euptelea and Muehlenbeckia both genuses?,yes,"Yes, both Euptelea and Muehlenbeckia are gener..."
1032,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,6,0.987512,Passage:\nHoy (boat)\nA hoy was a small sloop...,"Blustery,Eolic,Aeolian Action,Wind Cycle,Cyclo...",I don't know.


In [151]:
from typing import Dict
def extract_answer(x):
    if "generated_text" in x:
        return json.loads(x).get('generated_text')
    return x
df2['completion'] = df2.completion.map(extract_answer)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['completion'] = df2.completion.map(extract_answer)


In [152]:
df2

Unnamed: 0,filename,completion_tokens,latency,question,ground_truth,completion
0,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,44,1.692961,Which as more species Cassiope or Deutzia?,Deutzia,"\n\nAccording to the provided context, Deutzia..."
3,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,24,1.271467,Are Kakwa River and Bighead River located in t...,yes,"\n\nYes, Kakwa River and Bighead River are bot..."
4,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,117,4.177421,Passage:\nTubman: Conductor of the Underground...,National Underground Railroad Network to Freed...,"\n\nThe answer to the question ""Who presents ""..."
5,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,19,1.376350,"Passage:\nGummo Marx\nMilton ""Gummo"" Marx (Oct...","Karl Marx,Karl Heinrich Marx,K. H. Marx,Marx, ...",\n\nI don't know the answer to this question.
11,results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2,14,1.123095,Hardley Flood is an area of lagoons that suppo...,duck,\n\nI don't know.
...,...,...,...,...,...,...
1019,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,23,1.322193,Passage:\nGuillemot\nGuillemots is the common ...,"Sea bird,Marine birds,Sea-bird,Marine bird,Sea...",I don't know the answer to this question. The ...
1020,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,6,1.132178,"Which film has the director born earlier, Dos ...",Dos Basuras,I don't know.
1023,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,20,1.279956,Are Euptelea and Muehlenbeckia both genuses?,yes,"Yes, both Euptelea and Muehlenbeckia are gener..."
1032,results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2,6,0.987512,Passage:\nHoy (boat)\nA hoy was a small sloop...,"Blustery,Eolic,Aeolian Action,Wind Cycle,Cyclo...",I don't know.


In [153]:
comparison = {}
dont_know_type_responses = {}
valid_responses = {}
for f in unique_result_files_list:
    dont_know_type_responses[f] = 0
    valid_responses[f] = 0
for r in df2.iterrows():
    r = r[1]
    if r['question'] not in comparison:
        comparison[r['question']] = []
    r = dict(r)    
    comparison[r['question']].append(r)
    if 'don\'t know' in r['completion']:
        dont_know_type_responses[r['filename']] += 1
    else:
        valid_responses[r['filename']] += 1

    


In [154]:
logger.info(f"dont_know_type_responses={dont_know_type_responses}\nvalid_responses={valid_responses}")

[2024-09-23 20:35:14,571] p121634 {2041944104.py:1} INFO - dont_know_type_responses={'results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2': 48, 'results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2': 233}
valid_responses={'results-llama3-8b-g5.12xl-tp=2-mc=max-triton-ec2': 16, 'results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2': 70}


In [155]:
Path("comparison.json").write_text(json.dumps(comparison, indent=2))

615563

In [156]:
for k in comparison.keys():
    print(json.dumps(comparison[k], indent=2))
    break

[
  {
    "filename": "results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2",
    "completion_tokens": 44,
    "latency": 1.6929611009982182,
    "question": "Which as more species Cassiope or Deutzia?",
    "ground_truth": "Deutzia",
    "completion": "\n\nAccording to the provided context, Deutzia has about 60 species, while Cassiope has 9-12 species. Therefore, Deutzia has more species."
  },
  {
    "filename": "results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2",
    "completion_tokens": 48,
    "latency": 4.967612214997644,
    "question": "Which as more species Cassiope or Deutzia?",
    "ground_truth": "Deutzia",
    "completion": "\n\nAccording to the provided context, Deutzia has about 60 species, while Cassiope has 9-12 species. Therefore, Deutzia has more species than Cassiope."
  },
  {
    "filename": "results-llama3-8b-g5.12xl-tp=2-mc=max-djl-ec2",
    "completion_tokens": 48,
    "latency": 2.8179733110009693,
    "question": "Which as more species Cassiope or Deutzia?",
    "groun