In [1]:
%pip install -q evaluate rouge_score

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
from datasets import load_from_disk

new_ds = load_from_disk("../data/datasets/ms_macro/")
new_ds

Dataset({
    features: ['answers', 'passages', 'query', 'query_id', 'query_type', 'wellFormedAnswers'],
    num_rows: 500
})

In [4]:
counts = {}
indices = {}
size = 100
for i in range(new_ds.num_rows):
    row = new_ds[i]
    query_type = row["query_type"]
    if query_type in counts:
        counts[query_type] += 1
    else:
        counts[query_type] = 1
    if counts[query_type] == size:
        indices[query_type] = i
counts, indices

({'NUMERIC': 100,
  'DESCRIPTION': 100,
  'ENTITY': 100,
  'PERSON': 100,
  'LOCATION': 100},
 {'NUMERIC': 179,
  'DESCRIPTION': 215,
  'ENTITY': 443,
  'LOCATION': 461,
  'PERSON': 499})

In [5]:
new_ds.to_pandas()

Unnamed: 0,answers,passages,query,query_id,query_type,wellFormedAnswers
0,"[2,662]","{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0], 'pas...",albany mn population,15177,NUMERIC,"[The population of Albany, Minnesota is 2,662. ]"
1,[The Volcano forecast for Apr 12 is 52 degrees...,"{'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0]...","current weather in volcano, ca",114414,DESCRIPTION,[The Volcano forecast for Apr 12 is 52 degrees...
2,[Hippocrates],"{'is_selected': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]...",____________________ is considered the father ...,9083,DESCRIPTION,[Hippocrates is considered the father of moder...
3,[120 days from the date of the Note.],"{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...",how many days is an appraisal good for a fanni...,281439,NUMERIC,[An appraisal is good for 120 days from the da...
4,"[From $26,000 to $39,000 a year]","{'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]...",average pharmacy tech salary,40287,NUMERIC,[The average salary for a pharmacy technician ...
...,...,...,...,...,...,...
495,"[The Pool Shower, Inc. is a Georgia Domestic P...","{'is_selected': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]...",the pool shower company,518269,PERSON,"[The Pool Shower, Inc. is a Georgia Domestic P..."
496,[Hanson],"{'is_selected': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]...",longest tenured american football players,442806,PERSON,[Hanson is the longest tenured American footba...
497,[Mount Able Baptist Church is located at the a...,"{'is_selected': [1, 0, 0, 0, 0, 0, 0, 0, 0], '...",mt. view baptist in pendleton sc,460250,PERSON,[Mount Able Baptist Church is located at the a...
498,[Honeysuckle Weeks],"{'is_selected': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]...",what actress disappeared for a while,549739,PERSON,[The actress disappeared for a while Honeysuck...


In [5]:
import evaluate

bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")

In [6]:
def calc_metrics(ds):
    predictions = [ds[i]["answers"][0] for i in range(ds.num_rows)]
    references = [ds[i]["wellFormedAnswers"][0] for i in range(ds.num_rows)]
    bleu_scores = bleu.compute(predictions=predictions, references=references)
    rouge_scores = rouge.compute(predictions=predictions, references=references)
    return {"bleu_scores": bleu_scores, "rouge_scores": rouge_scores}

In [8]:
calc_metrics(new_ds)

{'bleu_scores': {'bleu': 0.5842479720128682,
  'precisions': [0.7814257485940113,
   0.7185392334265505,
   0.6801561945331913,
   0.6543700340522134],
  'brevity_penalty': 0.8263321448047812,
  'length_ratio': 0.8398008680112331,
  'translation_length': 6579,
  'reference_length': 7834},
 'rouge_scores': {'rouge1': 0.6301946495853493,
  'rouge2': 0.5266427189500504,
  'rougeL': 0.623467453115133,
  'rougeLsum': 0.6239164817179192}}

In [9]:
def calc_all_metrics(ds):
    result = {}
    result["OVERALL"] = calc_metrics(ds)
    for query_type in indices:
        result[query_type] = calc_metrics(
            ds.filter(lambda example: example["query_type"] == query_type)
        )

    return result

In [10]:
calc_all_metrics(new_ds)

{'OVERALL': {'bleu_scores': {'bleu': 0.5842479720128682,
   'precisions': [0.7814257485940113,
    0.7185392334265505,
    0.6801561945331913,
    0.6543700340522134],
   'brevity_penalty': 0.8263321448047812,
   'length_ratio': 0.8398008680112331,
   'translation_length': 6579,
   'reference_length': 7834},
  'rouge_scores': {'rouge1': 0.6301946495853493,
   'rouge2': 0.5266427189500504,
   'rougeL': 0.623467453115133,
   'rougeLsum': 0.6239164817179192}},
 'NUMERIC': {'bleu_scores': {'bleu': 0.3589193328591513,
   'precisions': [0.7536764705882353,
    0.6494413407821229,
    0.5884244372990354,
    0.5657657657657658],
   'brevity_penalty': 0.5649158870633492,
   'length_ratio': 0.6365054602184087,
   'translation_length': 816,
   'reference_length': 1282},
  'rouge_scores': {'rouge1': 0.5569863096088544,
   'rouge2': 0.4262959859853511,
   'rougeL': 0.5495190228731732,
   'rougeLsum': 0.5502805905003136}},
 'DESCRIPTION': {'bleu_scores': {'bleu': 0.7521919521555381,
   'precisions'

In [11]:
import json
import sys
import os

os.environ["TEST_FIRST_5"] = "true"
os.environ["LANGCHAIN_DEBUG"] = "true"

from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from evaluate_llm_ms_macro import (
    QAChainWithMsMacroDataset,
    llm_loader,
    calc_all_metrics,
)

loading env vars from: /Users/inflaton/code/emtech/gpt/Llama-2-eval/.env
App init started at 2023-10-10 12:04:33.775140
Running on:  macOS-14.0-arm64-arm-64bit
MPS is  available
CUDA is NOT available
hf_embeddings_device_type: mps
hf_pipeline_device_type: mps
initializing LLM: openai
  hf_pipeline_device_type: mps
     load_quantized_model: None
              torch_dtype: torch.float32
                 n_threds: 24
              using model: gpt-3.5-turbo
initialization complete
App init completed in 0.167s


In [12]:
%%time

eval_ds = new_ds.select(range(5))
qa_chain = QAChainWithMsMacroDataset(eval_ds, llm_loader)

answers = []
for i in range(eval_ds.num_rows):
    inputs = {"question": str(eval_ds[i]["query"]), "chat_history": []}
    result = qa_chain.call_chain(
        inputs,
        None,
        None,
        True,
    )
    answers.append(result["answer"])

result = calc_all_metrics(eval_ds, answers)

for i in range(eval_ds.num_rows):
    n = i + 1
    print(f"Q-{n:03d}: {eval_ds[i]['query']}")
    print(f"A-{n:03d}: {answers[i]}")
    print(f"G-{n:03d}: {eval_ds[i]['wellFormedAnswers'][0]}\n")

print(f"\n\nscores: {json.dumps(result, indent=2)}\n")

{'question': 'albany mn population', 'chat_history': []}
[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain] Entering Chain run with input:
[0m{
  "question": "albany mn population",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:ConversationalRetrievalChain > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "albany mn population",
  "context": "City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who li

In [13]:
print(
    "System: Use the following pieces of context to answer the users question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\nCity of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.\n\nPlace of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.\n\nFor the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n\nAlbany, Minnesota, as per 2017 US Census estimate, has a community population of 2,662 people. Albany is located in Stearns County, 20 miles west of St. Cloud and 80 miles northwest of Minneapolis/St. Paul on Interstate 94 (I-94). Albany has direct access to State Highway 238, which originates in Albany.\n\nSponsored Topics. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It is part of the St. Cloud Metropolitan Statistical Area.\n\nRecent posts about Albany, Minnesota on our local forum with over 2,000,000 registered users. Albany is mentioned 87 times on our forum: Latest news from Albany, MN collected exclusively by city-data.com from local newspapers, TV, and radio stations. Ancestries: German (55.6%), Irish (10.0%), Polish (5.9%), Norwegian (5.4%), Swedish (2.8%), United States (2.6%).\n\nFor population 25 years and over in 56307: 1  High school or higher: 87.4%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 3.3 4 %. Unemployed: 3. 5 2%. Mean travel time to work (commute): 23.6 minutes.\n\nFor population 25 years and over in Albany: 1  High school or higher: 86.7%. 2  Bachelor's degree or higher: 15.4%. 3  Graduate or professional degree: 4.4 4 %. Unemployed: 4. 5 3%. Mean travel time to work (commute): 23.0 minutes.\nHuman: albany mn population"
)

System: Use the following pieces of context to answer the users question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
City of Albany, MN Zip Codes. City of Albany, MN Demographic Information. * Demographic data is based on information taken from the 2000 Census. City of Albany, MN covers 1 Area Code. City of Albany, MN covers 1 Zip Code. 15 Cities within 15 Miles of the City of Albany, MN.

Place of birth for U.S.-born residents: 70% of the 56307 zip code residents lived in the same house 5 years ago. Out of people who lived in different houses, 71% lived in this county. Out of people who lived in different counties, 50% lived in Minnesota. 92% of the 56307 zip code residents lived in the same house 1 year ago.

For the unincorporated community in southeast Minnesota named West Albany, see West Albany, Minnesota. Albany is a city in Stearns County, Minnesota, United States. The population was 2,561 at the 2010 census. It

In [4]:
print(
    "System: Use the following pieces of context to answer the users question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\nHippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.\n\nMany of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.\n\nDespite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.\n\nAt least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actually known about Hippocrates the father of Medicine. Consequently, he has become the projection of what people ideally want in a physician.\n\n460 – c. 370 BC) was a Greek physician of the Age of Pericles (Classical Greece), and is considered one of the most outstanding figures in the history of medicine.\n\nTRUE. Hippocrates is considered the father of modern medicine because he did not believe that illness was a punishment inflicted by the gods. True False. Weegy: TRUE. [ \n\nThe two sons of Hippocrates, Thessalus and Draco, and his son-in-law, Polybus, were his students. According to Galen, a later physician, Polybus was Hippocrates' true successor, while Thessalus and Draco each had a son named Hippocrates.\n\nHippocrates is mentioned in passing in the writings of two contemporaries: Plato, in Protagoras and Phaedrus, and, Aristotle 's Politics, which date from the 4th century BC. Soranus wrote that Hippocrates' father was Heraclides, a physician, and his mother was Praxitela, daughter of Tizane.\n\nReload the page to try again! Press Cmd-0 to reset your zoom. Press Ctrl-0 to reset your zoom. It looks like your browser might be zoomed in or out. Your browser needs to be zoomed to a normal size to record audio.\n\nHowever, the achievements of the writers of the Corpus, the practitioners of Hippocratic medicine, and the actions of Hippocrates himself were often commingled; thus very little is known about what Hippocrates actually thought, wrote, and did.\nHuman: ____________________ is considered the father of modern medicine."
)

System: Use the following pieces of context to answer the users question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Hippocrates is widely considered to be the Father of Medicine. His contributions revolutionized the practice of medicine; but after his death the advancement stalled.

Many of the invaluable lessons prescribed in that place of learning are assigned to Hippocrates. If that was the case, then it truly was Hippocrates, with his approach to healing and the role of the doctor, that influenced western medicine for thousands of years.

Despite this, Hippocrates is attributed with a great many wonderful deeds and thoughts. He is recognised as the founder of the Hippocratic School of Medicine, a college that revolutionized the understanding of medicine in Ancient Greece.

At least that is what we’d like to think. While his fame was such to warrant a mention from the likes of Plato and Aristotle, not much is actual

```
Q-003: ____________________ is considered the father of modern medicine.
A-003: Hippocrates is considered the father of modern medicine.
G-003: Hippocrates is considered the father of modern medicine.
```

In [11]:
test_ds = new_ds.select([1])
test_ds[0]

{'answers': ['The Volcano forecast for Apr 12 is 52 degrees and Patchy light rain.'],
 'passages': {'is_selected': [1, 0, 1, 0, 0, 0, 0, 1, 0, 0],
  'passage_text': ['Volcano 10 Day Weather. Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny.',
   'Current U.S. National Radar--Current. The Current National Weather Radar is shown below with a UTC Time (subtract 5 hours from UTC to get Eastern Time). National Weather Forecast--Current. The Current National Weather Forecast and National Weather Map are shown below.',
   'Volcano 10 Day Weather. 1  Sunday:The Volcano forecast for Apr 09 is 43 degrees and Sunny. There is 55 percentage chance of rain and 4 mph winds from the Southwest. 2  Monday:The Volcano forecast for Apr 10 is 51 degrees and Sunny. There is 49 percentage chance of rain and 3 mph winds from the Southwest.',
   'Volcano, CA We

In [12]:
calc_metrics(test_ds)

{'bleu_scores': {'bleu': 1.0,
  'precisions': [1.0, 1.0, 1.0, 1.0],
  'brevity_penalty': 1.0,
  'length_ratio': 1.0,
  'translation_length': 14,
  'reference_length': 14},
 'rouge_scores': {'rouge1': 1.0,
  'rouge2': 1.0,
  'rougeL': 1.0,
  'rougeLsum': 1.0}}

In [18]:
test_ds[0]["answers"][0].split()

['The',
 'Volcano',
 'forecast',
 'for',
 'Apr',
 '12',
 'is',
 '52',
 'degrees',
 'and',
 'Patchy',
 'light',
 'rain.']

In [19]:
len(test_ds[0]["answers"][0].split())

13

In [22]:
test_ds = new_ds.select([4])
test_ds[0]

{'answers': ['From $26,000 to $39,000 a year'],
 'passages': {'is_selected': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
  'passage_text': ['If you are interested in becoming a pharmacy technician, you’re choosing a career that is in high demand. According to the U.S. Bureau of Labor Statistics (BLS), the career growth is expected to be “much faster than average”, with an employment increase of 32% predicted in the decade spanning 2010 to 2020*.',
   'What can a pharmacy technician really expect to earn in today’s economy? According to Salary.com, pharmacy technicians make anywhere from $26,000 to $39,000 a year, though most make around $32,000 annually. California has the highest average pharmacy technician wage, at $34,317, according to Open Farm Tech’s website.',
   'The median annual wage for pharmacy technicians was $30,410 in May 2015. Employment of pharmacy technicians is projected to grow 9 percent from 2014 to 2024, faster than the average for all occupations. Increased demand for prescri

In [23]:
calc_metrics(test_ds)

{'bleu_scores': {'bleu': 0.19303951204286907,
  'precisions': [0.875, 0.7142857142857143, 0.5, 0.4],
  'brevity_penalty': 0.32465246735834974,
  'length_ratio': 0.47058823529411764,
  'translation_length': 8,
  'reference_length': 17},
 'rouge_scores': {'rouge1': 0.5833333333333334,
  'rouge2': 0.4545454545454545,
  'rougeL': 0.5833333333333334,
  'rougeLsum': 0.5833333333333334}}