# Benchmarking Using HellaSwag Dataset

In [1]:
# ind: question index
# id: question id
# activity_label: A short phrase describing the events in the question
# ctx: The full context for the question
# ctx_a: The first sentence of the context
# ctx_b: The second sentence of the context
# dataset: Domain of the question -- e.g. activitynet / wikihow
# ending_options: A list of four ending choices

In [2]:
# import json


# with open('../data/benchmarking/train.jsonl', 'r') as json_file:
#     json_list = list(json_file)

# for json_str in json_list:
#     result = json.loads(json_str)
#     print(type(result))
#     print(result)
#     break
# result.keys()

In [3]:
import pandas as pd    
df = pd.read_json(path_or_buf='../data/benchmarking/train.jsonl', lines=True)

In [4]:
context_opts = df[['ctx', 'ending_options']]
context_opts

Unnamed: 0,ctx,ending_options
0,"Then, the man writes over the snow covering th...","[, the man adds wax to the windshield and cuts..."
1,A female chef in white uniform shows a stack o...,"[are then sprinkled with brown sugar., are pla..."
2,A female chef in white uniform shows a stack o...,"[hits the peeled cheesecake, followed by slice..."
3,A tray of potatoes is loaded into the oven and...,"[is poured into a midden., is placed onto a ba..."
4,The man in the center is demonstrating a hairs...,[is standing on the sponge cutting the hair of...
...,...,...
39900,[header] How to merge documents in microsoft w...,[[step] The asterisk will appear in the docume...
39901,[header] How to teach a bulldog to skateboard ...,"[Above all, have the board's axis facing away ..."
39902,[header] How to air dry fruit [title] Wash and...,[[step] If you do not know the exact dryness o...
39903,[header] How to be a good fast bowler [title] ...,[Grip the ball firmly and count to ten from th...


In [5]:
sample = context_opts.head()

In [9]:
sample['ctx'].values

array(['Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then',
       'A female chef in white uniform shows a stack of baking pans in a large kitchen presenting them. the pans',
       'A female chef in white uniform shows a stack of baking pans in a large kitchen presenting them. The pans are filled with pastries and loaded into the oven. a knife',
       'A tray of potatoes is loaded into the oven and removed. A large tray of cake is flipped over and placed on counter. a large tray of meat',
       'The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt'],
      dtype=object)

In [11]:
from prompttools.experiment import LlamaCppExperiment
from prompttools.utils import semantic_similarity
import numpy as np

import datetime

start = datetime.datetime.now()
print(f"start time: {start}")

model_paths = [
    "../../llama/llama.cpp/models/7b/ggml-vicuna-7b-1.1-q4_0.bin",
]
prompts = sample['ctx'].values
temperatures = [0.0, 0.5, 1.0]

call_params = dict(temperature=temperatures)
experiment = LlamaCppExperiment(model_paths, prompts, call_params=call_params)

experiment.run()
experiment.evaluate("similar_to_expected", semantic_similarity, expected=["generic response"] * 15)

experiment.visualize()
print("Time taken: ", datetime.datetime.now() - start)

start time: 2023-08-11 18:24:36.768293


llama.cpp: loading model from ../../llama/llama.cpp/models/7b/ggml-vicuna-7b-1.1-q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 4096
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 32
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 11008
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size =    0.08 MB
llama_model_load_internal: mem required  = 5407.72 MB (+ 1026.00 MB per state)
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 
llama_new_context_with_model: kv self size  =  256.00 MB

llama_print_timings:        l

Unnamed: 0,temperature,prompt,response,latency,similar_to_expected
0,0.0,"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then",", the man takes off his hat and holds it in front of him as he walks away from the camera.\nThe video ends with the words ""Love is patient"" written on the snow, followed by the hashtag #SnowManProposal.\nThe video quickly went viral, with over 10 million views on YouTube and countless shares on social media. The couple's friends and family were also quick to share their congratulations and well-wishes for the happy couple.\n""I can't believe how many people have seen it,"" said Sarah. ""It's",18.544025,0.033135
1,0.0,A female chef in white uniform shows a stack of baking pans in a large kitchen presenting them. the pans,"are lined up on a counter, with various ingredients and utensils visible in the background.\n4k00:12Aerial drone footage of a modern city skyline at sunset, showing tall buildings and skyscrapers with reflections on the water.\n4k00:13Aerial drone footage of a modern city skyline at night, showing tall buildings and skyscrapers with lights on.",14.391876,0.004737
2,0.0,A female chef in white uniform shows a stack of baking pans in a large kitchen presenting them. The pans are filled with pastries and loaded into the oven. a knife,"is inserted to check if the pastry is done, then taken out and sliced open to reveal a delicious looking pastry. Shot on RED EPIC-W 8K Helium.\n4k01:02Aerial drone footage of a busy city street at night with cars and people walking.\n4k00:32An aerial view of a modern city skyline at sunset, with buildings reaching towards the sky. Shot on RED EPIC-W 8K Helium.",17.267699,0.0266
3,0.0,A tray of potatoes is loaded into the oven and removed. A large tray of cake is flipped over and placed on counter. a large tray of meat,"is taken out of the oven and put on a plate.\nA man walks in with a dog, they go to the back room.\nA woman comes in and starts working at the register. She takes an order from a customer and goes to the kitchen to prepare it.\nA man comes in and sits down at a table. He orders a sandwich and drinks. The woman brings out his food and he eats.\nThe woman talks on the phone for a bit, then hangs up. She walks over to the man and asks him if he would like another drink. He nods",23.014908,0.126868
4,0.0,The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt,"is also showing his own hairstyle to the other man.\nThe man in the center is holding a comb and appears to be giving advice or instructions on how to style one's hair. The man in the blue shirt is following the demonstration by the man in the center, and appears to be listening attentively.",12.066669,-0.002631
5,0.5,"Then, the man writes over the snow covering the window of a car, and a woman wearing winter clothes smiles. then",", the man takes off his hat and holds it in front of him as if he is bowing.\nThe next scene shows the man and woman on a bed, with the man looking at her intently. Then, they are seen walking together outside, with the man holding her hand. The video ends with the man and woman hugging each other.",12.385372,-0.016033
6,0.5,A female chef in white uniform shows a stack of baking pans in a large kitchen presenting them. the pans,"are from different brands, models and materials\n4k00:15Aerial drone footage of a modern city skyline at sunset with buildings reaching towards the clouds\n4k00:12Aerial drone footage of a large construction site with heavy machinery and workers on the job",11.795872,0.071052
7,0.5,A female chef in white uniform shows a stack of baking pans in a large kitchen presenting them. The pans are filled with pastries and loaded into the oven. a knife,"is inserted into one of the pastries, which is then sliced open to reveal its filling. Close-up of the pastry and filling, followed by a shot of the chef putting the pan back onto a shelf in the kitchen.\n4k00:13Aerial footage of a busy street in Shibuya, Tokyo, Japan, showing a variety of people walking, shopping, and socializing on a sunny day.\n4k00:28Aerial footage of a busy intersection in Shinjuku, Tokyo, Japan, showing a large crowd of",20.291013,-0.060286
8,0.5,A tray of potatoes is loaded into the oven and removed. A large tray of cake is flipped over and placed on counter. a large tray of meat,balls is flipped over and placed in the oven.\nA tray of bread is taken out of the oven and put on a rack to cool. A large tray of lasagna is removed from the oven and set on a table.\nA tray of rolls is taken out of the oven and placed on a counter. A large tray of cookies are taken out of the oven and placed on a tray.\nA tray of muffins is taken out of the oven and placed on a rack to cool. A large tray of french bread is,21.82052,-0.011384
9,0.5,The man in the center is demonstrating a hairstyle on the person wearing the blue shirt. the man in the blue shirt,"is his client, and he is trying to convince him to get a haircut at his barbershop.",6.829425,-0.040271


Time taken:  0:04:13.145445


In [2]:
from prompttools.

ModuleNotFoundError: No module named 'prompttools.benchmarks'