# SWAG

Will start with the SWAG dataset to get a baseline before moving on to the HellaSwag dataset.

In [1]:
from unittest.mock import patch
from datasets import load_dataset

with patch("datasets.builder.is_remote_filesystem", return_value=False):
    dataset = load_dataset("swag")

In [44]:
from dataclasses import dataclass

NUM_ANSWERS = 4
CHOICES = ["A", "B", "C", "D"]


@dataclass
class Sample:
    """A sample multiple choice question."""

    question: str
    answers: list[str]
    label: int

    @property
    def choices(self) -> list[str]:
        """Return the multiple choice answers."""
        return [ f"{choice}) {option}" for choice, option in zip(CHOICES, self.answers) ]

    @property
    def label_text(self) -> str:
        """Returns the expected correct answer."""
        return self.answers[self.label]

    @property
    def expected(self) -> str:
        """Returns the expected correct answer."""
        return f"{self.expected_choice} {self.label_text}"

    @property
    def expected_choice(self) -> str:
        """Returns the expected correct answer."""
        return f"{CHOICES[self.label]})"



def make_sample(record: dict[str, str]) -> Sample:
    return Sample(record["startphrase"], [record[f"ending{i}"] for i in range(0, NUM_ANSWERS)], label=record["label"])

ds = dataset['train']
it = iter(ds)
make_sample(next(it)), make_sample(next(it)), make_sample(next(it))


(Sample(question='Members of the procession walk down the street holding small horn brass instruments. A drum line', answers=['passes by walking down the street playing their instruments.', 'has heard approaching them.', "arrives and they're outside dancing and asleep.", 'turns the lead singer watches the performance.'], label=0),
 Sample(question='A drum line passes by walking down the street playing their instruments. Members of the procession', answers=['are playing ping pong and celebrating one left each in quick.', 'wait slowly towards the cadets.', 'continues to play as well along the crowd along with the band being interviewed.', 'continue to play marching, interspersed.'], label=3),
 Sample(question='A group of members in green uniforms walks waving flags. Members of the procession', answers=['pay the other coaches to cheer as people this chatter dips in lawn sheets.', 'walk down the street holding small horn brass instruments.', 'is seen in the background.', 'are talking a cou

# Open AI Client

This will open a client library to talk to a local model server hosting the model we wish to eval.

In [10]:
from openai import OpenAI

BASE_URL = "http://llama-cpp-server-cuda.llama:8000/v1"
API_KEY = "sk-xxx"
MODEL_ID = "/data/models/mistral-7b-instruct-v0.1.Q5_K_M.gguf"

client = OpenAI(base_url=BASE_URL, api_key=API_KEY)
response = client.chat.completions.create(
    model=MODEL_ID,
    messages=[
        {
            "content": "You are a helpful assistant.",
            "role": "system"
        },
        {
            "content": "What is the capital of France?",
            "role": "user"
        }
    ],
)
for choice in response.choices:
    print(choice.message.content)

 The capital city of France is Paris.


In [32]:
INSTRUCTIONS = "Choose the most plausible continuation for the story. Please answer with the letter of the correct answer."

def multiple_choice_question(sample: Sample) -> str:
    """Make a prompt from a sample."""
    return "\n".join([
        sample.question,
        *sample.choices
    ])

def answer(client: OpenAI, question: str, instructions: str = INSTRUCTIONS) -> str:
    response = client.chat.completions.create(
        model=MODEL_ID,
        messages=[
            {
                "content": instructions,
                "role": "system"
            },
            {
                "content": question,
                "role": "user"
            }
        ],
    )
    choice = response.choices[0]
    return choice.message.content.strip()


sample = make_sample(next(it))
response = answer(client, multiple_choice_question(sample))
sample, response

(Sample(question='Then, the boy left again the court, and then he returns with a racket and a ball. The child serves the ball, and the boy', answers=['applauds at the audience.', 'continue a successful strike grass.', 'has the ball and also lets his falls get the ball.', 'play stiff for stopping and score one shot.'], label=0),
 '<</SYS>> Then, the boy left again the court, and then he returns with a racket and a ball. The child serves the ball, and the boy (D) plays stiff for stopping and scores one shot.')

In [46]:
from tqdm import tqdm

correct = 0
total = 0
it = iter(ds)

In [47]:
from itertools import islice

def batched(iterable, n):
    # batched('ABCDEFG', 3) --> ABC DEF G
    if n < 1:
        raise ValueError('n must be at least one')
    it = iter(iterable)
    while batch := tuple(islice(it, n)):
        yield batch

N_SHOT = 10
pbar = tqdm(enumerate(batched(it, N_SHOT + 1)))

for i, record_batch in pbar:
    shot_records = record_batch[0:N_SHOT]
    record = record_batch[-1]

    shot_samples = [ make_sample(r) for r in record_batch ]
    shot_questions = [ f"{multiple_choice_question(sample)}\n\nAnswer:\n{sample.expected}\n\n" for sample in shot_samples ]
    instructions = "\n".join([
        INSTRUCTIONS,
        *[
            shot_question
            for shot_question in shot_questions
        ],
    ])

    sample = make_sample(record)
    question = f"{multiple_choice_question(sample)}\n\nAnswer:\n"

    response = answer(client, question, instructions)
    total += 1
    # Sometimes the model may answer something like 'The best answer is A) <answer>'
    # so we'll give it credit.
    if response == sample.expected or sample.expected_choice in response or sample.label_text in response:
        correct += 1
    else:
        print(response)
        print(sample.expected)

    accuracy = (correct / total) * 100
    pbar.set_description(f"{accuracy:2.2f}% - {correct} of {total}")


75.00% - 3 of 4: : 4it [00:51, 12.11s/it] 

D) is open into a web.
B) crashes through the wall of an old ruin.


60.00% - 3 of 5: : 5it [01:04, 12.33s/it]

C) is seen running down the land.
A) is wearing black shorts and a shirt.


72.73% - 8 of 11: : 11it [03:04, 16.07s/it]

A) then grabs the lid and puts again down her can and the harriet is trapped by her legs in her lap.
C) strikes a quick pose with some attitude, then starts to brush her short bangs.


69.23% - 9 of 13: : 13it [03:32, 14.89s/it]

D) changes his leg and finishes the song, moves to bend his arm during a song and drums the rhythm again.
C) stops to take a breath for a moment and then continues on with his song.


77.27% - 17 of 22: : 22it [05:16, 10.92s/it]

<input text="I am a language model. How can I help you?">
A) stare out at her.


75.00% - 18 of 24: : 24it [05:42, 12.11s/it]

The most plausible continuation for the story is D) is seen continuing to mow the lawn on his lawn.
A) continues stuffing leads into a bag as well as raking leaves all around the yard.


72.00% - 18 of 25: : 25it [05:53, 11.94s/it]

D) interact around two groups.
C) come out and perform a routine on the stage.


70.37% - 19 of 27: : 27it [06:17, 12.01s/it]

D) pauses, turning a middle head towards her, and turns his head.
A) falters, closes his eyes and takes a deep breath.


67.86% - 19 of 28: : 28it [06:31, 12.59s/it]

Answer: A) pulls himself from a tightrope and performs a flight raft, some of them watch.
B) carefully unrolls the scroll as the aid looks on behind him.


69.70% - 23 of 33: : 33it [07:30, 11.53s/it]

D) take the lantern and haul them into the water.
A) pass between high mountains.


67.65% - 23 of 34: : 34it [07:43, 12.02s/it]

B) sail slowly, making their way up the stairs.
A) pelt their faces as groups with men emerges in a raft.


68.42% - 26 of 38: : 38it [09:04, 15.38s/it]

D) lob the ball back and forth between two stacks of football spectators.
A) picks up a wooden bucket by its rope handle.


69.77% - 30 of 43: : 43it [10:12, 14.82s/it]

The correct answer is C) shakes his head as someone makes his way into the grotto, bumping into one of the kids as he does so.
B) is trying to calm the kids.


75.44% - 43 of 57: : 57it [13:07, 14.48s/it]

The correct answer is: A) throw their balls to five people standing opposite of their wall.
B) shows up again and then goes back to clips of the man who is now just walking on the field.


74.14% - 43 of 58: : 58it [13:22, 14.73s/it]

D) continues to push with a hose along with the dog.
A) decided to not mow it and goes back to his original place of cutting.


73.33% - 44 of 60: : 60it [13:44, 12.87s/it]

A) begins moving a sword down in front of another opponent.
C) moves off on a mat while looking into the camera.


72.13% - 44 of 61: : 61it [13:56, 12.65s/it]

A) picks up the manuscript, sees it, and hurries away.
D) increases the screams of the radio and pulls into a line at new york.


74.29% - 52 of 70: : 70it [16:04, 12.91s/it]

D) makes her way past his photographs leaving then turns away and walks away.
A) rises and dries his hands.


73.24% - 52 of 71: : 71it [16:22, 14.54s/it]

<</SYS>> As they advance on each other in slow motion, the African-American boxer throws a powerful punch that lands square on the opponent's jaw, sending him sprawling to the ground. The crowd goes wild as the boxer stands over his defeated opponent, flexing his biceps and taunting him with a smirk.
A) advance on each other in slow motion.


72.60% - 53 of 73: : 73it [16:49, 14.28s/it]

D) appears with white words in the suspended white front of the 8 after you removed it from the pasta.
C) appears with the red clipart lion in the middle and the blue words british lion quality going around it.


71.62% - 53 of 74: : 74it [17:01, 13.70s/it]

D) locks his lips in a sultry look.
C) spots the nearby man's bare neck.


71.05% - 54 of 76: : 76it [17:25, 12.68s/it]

Answer: D) struggles to keep his arms around someone.
B) shows, plump - wall - headed, having slowly detergent to his little green bucket on the floor.


70.51% - 55 of 78: : 78it [17:50, 12.36s/it]

D) runs over to the ladder.
A) switches to the other line.


69.62% - 55 of 79: : 79it [18:02, 12.45s/it]

D) of the group wrapping and the man puts the boy in the raft again.
A) are shown of people riding around on boats.


68.75% - 55 of 80: : 80it [18:16, 12.93s/it]

A) The camera shows articles on the screen in front of a microphone.
B) shows celebrities while work a building.


68.29% - 56 of 82: : 82it [18:40, 12.44s/it]

D) watches his security guard climb over her shoulder.
C) turns to someone who drops his smile at the ground.


68.24% - 58 of 85: : 85it [19:19, 12.99s/it]

<SYS> D) attempts it across the way and begins across the bars.</D>
C) hops onto the parallel bars and works her way around, continuing to dance.


68.89% - 62 of 90: : 90it [20:12, 11.62s/it]

A) looks up at the branch open but does not even speak.
D) is now about six feet tall.


68.48% - 63 of 92: : 92it [20:37, 12.06s/it]

Answer: A) places it inside the napkin, then massages it.
B) is not fully recovered, but well on the way.


68.09% - 64 of 94: : 94it [21:02, 12.23s/it]

C) puts a pitcher into the air.
A) mixes all of her ingredients in a pitcher to make lemonade.


67.71% - 65 of 96: : 96it [21:24, 11.79s/it]

B) drills the dart board speeding away from the camera.
A) is walking on the field taking out her clothes and posing for a camera.


67.35% - 66 of 98: : 98it [21:48, 11.95s/it]

The answer is A) begins doing tai chi.
B) rides the inside of a new wave.


67.33% - 68 of 101: : 101it [22:23, 11.50s/it]

B) finds someone fighting several shots.
D) sits on a collapsible.


67.62% - 71 of 105: : 105it [23:10, 12.06s/it]

A) kicks the ball through the net in the end hits the ball.
C) hits the puck with her stick but is not very successful.


68.18% - 75 of 110: : 110it [24:07, 12.01s/it]

D) leans over and kisses someone in the head, cupping her head in her hand and dropping her onto her lap.
B) kisses her as she chats with the corner of her mouth.


68.42% - 78 of 114: : 114it [24:53, 11.52s/it]

A) 's like it has saved pieces of him.
C) zooms upwards out of his reach.


68.10% - 79 of 116: : 116it [25:13, 10.76s/it]

C) pauses, then steps back to his audience.
A) runs to someone's bedside.


67.52% - 79 of 117: : 117it [25:24, 10.95s/it]

A) has skillfully control his pleading eyes.
B) appears a little embarrassed.


67.50% - 81 of 120: : 120it [26:02, 12.21s/it]

The most plausible continuation for the story is D) holds it out to someone.
B) see the closing title screen.


67.74% - 84 of 124: : 124it [26:44, 11.24s/it]

D) sits up on his knees.
B) pours two mugs of beer.


67.72% - 86 of 127: : 127it [27:25, 12.58s/it]

A) changes acolytes in her workout and gets on her stepper.
B) stops and speaks her final words.


70.00% - 98 of 140: : 140it [30:17, 13.13s/it]

C) shows a piece of wood on a wooden floor.
B) brings in large rolls of wall paper.


69.93% - 100 of 143: : 143it [30:49, 11.57s/it]

B) is positioned on an easel.
D) appears on the screen when a woman gives an interview.


69.66% - 101 of 145: : 145it [31:14, 11.86s/it]

D) sits on a stereo with his arm draped.
C) sits at his desk.


69.39% - 102 of 147: : 147it [31:35, 11.42s/it]

C) narrates the arts room.
A) peers out a door in the hall as someone packs his things into the whiskey decanter.


69.13% - 103 of 149: : 149it [31:56, 10.65s/it]

D) crashes open front doors and into a living room home.
A) lifts his serious gaze from the contract.


68.67% - 103 of 150: : 150it [32:11, 11.98s/it]

Answer: D) peers cautiously at it and cautiously checks it out, then points past a young boy on the bottom of the dark hallway.
B) sees someone sitting on the stairs, smoking a cigarette.


68.21% - 103 of 151: : 151it [32:22, 11.94s/it]

D) opens the machine 'door to two.
A) reads the manuscript page with doleful eyes.


67.76% - 103 of 152: : 152it [32:33, 11.49s/it]

B) switches to a hidden scene.
A) runs to the stairs and sits at a door.


68.15% - 107 of 157: : 157it [33:37, 12.46s/it]

D) looks through the classroom's door as he waits on his steps.
B) turns away from the desk and picks up her bag.


69.09% - 114 of 165: : 165it [35:22, 12.45s/it]

A) is shown with a man is seated next in a car.
D) drives by on the road at a decent speed.


69.05% - 116 of 168: : 168it [35:58, 12.12s/it]

D) introduces someone and regards.
A) blinks his sad eyes, then strokes her cheek.


68.64% - 116 of 169: : 169it [36:08, 11.63s/it]

<
B) takes the wand, trying to put it on her own lips.


68.42% - 117 of 171: : 171it [36:29, 11.08s/it]

D) walks over to help, then he stands on the podium, and looks up surprised.
C) scurries towards a rat hole.


68.02% - 117 of 172: : 172it [36:40, 11.13s/it]

D) laughs, dancing downstairs - a shower getting dirty.
C) stick a finger over their head.


67.82% - 118 of 174: : 174it [37:00, 10.65s/it]

A) lifts his nose and whispers in his ear.
C) leans forward and opens the door.


67.80% - 120 of 177: : 177it [37:29,  9.79s/it]

D) rush for him.
B) start to file out.


68.82% - 128 of 186: : 186it [39:19, 12.64s/it]

C) gets up and they kneel down on the back of the building, neighboring with their guns of lights.
A) goes to the front door, opens it a crack and cautiously goes out.


68.45% - 128 of 187: : 187it [39:29, 11.86s/it]

B) accepts a knife over his back.
A) works on a car.


68.09% - 128 of 188: : 188it [39:39, 11.38s/it]

C) pulls the lever into the light.
B) studies contacts lens in their eye.


67.72% - 128 of 189: : 189it [39:54, 12.65s/it]

A) is busy at the wheel of a pickup and the man begins to ride backward, cars, then he moves down the path.
D) of different men gets on one of the boats and clean the rocks off at the end.


67.71% - 130 of 192: : 192it [40:31, 12.40s/it]

C) steps back to the first team event, showing their hair as they play as the words game is played.
B) give a thumbs to.


67.36% - 130 of 193: : 193it [40:45, 12.68s/it]

D) gets up and paddles away, rising and screaming as she rushes to being watched by the cops.
A) makes a loud grunt as she slams into someone that she did not see.


67.18% - 131 of 195: : 195it [41:13, 13.46s/it]

D) jumps down on the camel as a dog sits behind her.
C) is walking and holding a string tied to the camel's face.


67.17% - 133 of 198: : 198it [42:00, 16.02s/it]

The most plausible continuation for the story is:

A) spins his wrist between their cold hands and arms and manages to lock the handcuffs off them.

In this scenario, it seems that the character in question is using their agility and strength to free someone from being restrained by a guard or captor.
D) peels back his mask, exposing his mouth and plants a passionate kiss on him.


66.83% - 133 of 199: : 199it [42:16, 16.22s/it]

D) struggles to pull her axle out, hauling her off a grand sofa and pulling her through the other room toward the door.
A) puts something in his mouth, then slams it back onto the bar.


66.50% - 133 of 200: : 200it [42:30, 15.47s/it]

D) holds up a stack of cash and slides a knife against it.
C) touches his shoulder and puts someone's arm around someone.


66.67% - 136 of 204: : 204it [43:20, 13.39s/it]

C) find the booth falls into the night's only two - dollar bed.
A) writhe in pain among the debris.


66.50% - 137 of 206: : 206it [43:43, 12.53s/it]

D) leans back with her right hand, bringing the heart out of its own.
C) rotates her attention onto the railing.


66.18% - 137 of 207: : 207it [43:55, 12.52s/it]

The correct answer is C) climbs onto the high side of the capsizing lifeboat.
A) lie down on the boat.


66.03% - 138 of 209: : 209it [44:19, 12.18s/it]

A) dismounts, waving his arms around someone.
B) will pull themselves onto pieces of floating debris.


66.20% - 141 of 213: : 213it [45:17, 14.11s/it]

The two men makes a series of jumps in turn. The woman
C) cheers and the coach cheers.


66.36% - 144 of 217: : 217it [46:08, 13.10s/it]

The most plausible continuation for the story would be B) gazes after him with a slight smile.
A) walks to the shelf and picks up a finger puppet dancer.


66.21% - 145 of 219: : 219it [46:31, 12.39s/it]

C) indicates his name tag on the cheek.
A) tenses his face and nods.


66.22% - 147 of 222: : 222it [47:05, 11.65s/it]

D) run into the arena together and talk.
A) stands cheers and claps.


65.92% - 147 of 223: : 223it [47:18, 11.87s/it]

B) poses with her friend.
C) puts lipstick on her lips.


65.78% - 148 of 225: : 225it [47:42, 12.00s/it]

D) takes the drinks and puts him in the glass on the table.
B) reviews the ingredients to be used in the preparation of the drink.


65.94% - 151 of 229: : 229it [48:34, 12.45s/it]

A) looks away, scans the crowd, then downs the rest of his drink in one gulp.
B) plays the drums while a camera watches.


66.09% - 154 of 233: : 233it [49:19, 11.65s/it]

D) See the cleaned logo as the cameraman brushes with the person.
C) see the end credits.


65.96% - 155 of 235: : 235it [49:42, 11.39s/it]

B) shifts his cover then aims at someone.
C) looks back at her friend.


66.11% - 158 of 239: : 239it [50:31, 12.12s/it]

Answer: B) leaps back and grabs the rifle.
A) shoots him a look.


66.12% - 160 of 242: : 242it [51:09, 12.45s/it]

D) pushes a container and fries it in half.
A) steps forward and casually tosses the pistol on to the body.


66.26% - 163 of 246: : 246it [52:04, 13.40s/it]

B) pats his uncle's hair.
D) pets the dog as he watches his mother work.


66.40% - 166 of 250: : 250it [53:00, 13.21s/it]

Answer: D) gets up as he reaches a guy across behind her.
C) gives a thumbs up.


66.27% - 167 of 252: : 252it [53:23, 12.34s/it]

B) is interview to the camera while still jogging along the yard.
A) is seen speaking to the camera and continues to run around the beach area.


66.80% - 171 of 256: : 256it [54:08, 12.69s/it]


KeyboardInterrupt: 