In [22]:
import os
import random

import pandas as pd
from langchain_chroma import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.example_selectors.base import BaseExampleSelector
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain_core.example_selectors import LengthBasedExampleSelector


from tools.data_manager import CupaDatamanager
from tools.constants import WRITE_DIR, OPENAI_API_KEY

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [3]:
cupa_data_dir = "../data/raw/CUPA"
cupa_dm = CupaDatamanager()
dataset = cupa_dm.get_cupa_dataset(cupa_data_dir, WRITE_DIR, save_dataset=False)

  out_df = pd.concat([out_df, new_row_df], ignore_index=True)


[2m2025-03-12 16:42:26[0m [[32m[1minfo     [0m] [1mReading entire dataset        [0m [36mnum_contexts[0m=[35m120[0m [36mnum_questions[0m=[35m795[0m
[2m2025-03-12 16:42:26[0m [[32m[1minfo     [0m] [1mCreating train split          [0m [36mnum_contexts[0m=[35m72[0m [36mnum_questions[0m=[35m481[0m
[2m2025-03-12 16:42:26[0m [[32m[1minfo     [0m] [1mCreating dev split            [0m [36mnum_contexts[0m=[35m18[0m [36mnum_questions[0m=[35m121[0m
[2m2025-03-12 16:42:26[0m [[32m[1minfo     [0m] [1mCreating test split           [0m [36mnum_contexts[0m=[35m30[0m [36mnum_questions[0m=[35m193[0m


In [4]:
# load data
dataset["train"]

Unnamed: 0,correct_answer,options,option_0,option_1,option_2,option_3,question,context,context_id,q_id,split,difficulty
0,0,[to reinforce the concerns already felt by som...,to reinforce the concerns already felt by some...,to remind readers to beware of false promises,to explain that such sites often have a hidden...,to show that the risks of internet use are som...,Why does the writer describe a website about p...,Some time ago a website highlighted the risks ...,1,1_Q1,,83.030000
1,1,"[they got fired from their jobs, navigated cel...",they got fired from their jobs,navigated celebritys temptations and perils,scroll off our collective screen,are doomed to repost it,Which phrase echoes the image introduced in th...,Some time ago a website highlighted the risks ...,1,1_Q2,,78.060000
2,1,"[He is concerned by the risks they took., He a...",He is concerned by the risks they took.,He appreciates their unprecedented achievements.,He admires their technical skills.,He is impressed by the extent of their coopera...,What is the writers attitude to the online pio...,Some time ago a website highlighted the risks ...,1,1_Q3,,77.020000
3,0,"[He was unusually innovative in his approach.,...",He was unusually innovative in his approach.,His work was popular for the wrong reasons.,He inspired others writing in different fields...,His work displayed considerable literary skill.,What does the writer suggest about Justin Hall...,Some time ago a website highlighted the risks ...,1,1_Q4,,72.860000
4,2,"[People usually dislike exhibitionists., Someo...",People usually dislike exhibitionists.,Someones life can be a form of art.,Being too open may be counterproductive.,Relationships are always a private matter.,What point is exemplified by the references to...,Some time ago a website highlighted the risks ...,1,1_Q5,,77.020000
...,...,...,...,...,...,...,...,...,...,...,...,...
785,1,"[complain about a badly organised summer camp,...",complain about a badly organised summer camp,describe how children get on at a summer camp,explain how to apply for a job at a summer camp,advise children how to behave away from home,What is the writer trying to do in this text?,"At the summer camp where I work, I really enjo...",119,119_Q1,,63.000000
786,3,"[At camp, most prefer swimming to table-tennis...","At camp, most prefer swimming to table-tennis.",If they dont enjoy climbing they can play tabl...,"With help, most people become excellent table-...",Most of them already know how to play table-te...,What does the writer say about the children an...,"At the summer camp where I work, I really enjo...",119,119_Q2,,86.389999
787,2,[The youngest ones find it hardest to be away ...,The youngest ones find it hardest to be away f...,They complain about the food when they phone t...,They soon find that being at camp is enjoyable.,Few of them seem grateful to the staff.,What does the writer say about children who go...,"At the summer camp where I work, I really enjo...",119,119_Q3,,63.369999
788,1,[They should visit their children instead of p...,They should visit their children instead of ph...,They don't need to keep on phoning the camp.,They shouldn't allow their children to bring p...,They feel worse after speaking on the phone.,What does the writer think about some parents?,"At the summer camp where I work, I really enjo...",119,119_Q4,,57.910000


In [5]:
def human_format_input(row) -> str:
    return f"Context:\n{row.context}\nQuestion: {row.question}\nOptions:\n1. {row.option_0}\n2. {row.option_1}\n3. {row.option_2}\n4. {row.option_3}"

def human_format_output(row) -> str:
    return f"Question difficulty: {row.difficulty}"

In [6]:
few_shot_df = pd.DataFrame()
few_shot_df["input"] = dataset["train"].apply(human_format_input, axis=1)
few_shot_df["output"] = dataset["train"].apply(human_format_output, axis=1)
few_shot_df.head()

Unnamed: 0,input,output
0,Context:\nSome time ago a website highlighted ...,Question difficulty: 83.03
1,Context:\nSome time ago a website highlighted ...,Question difficulty: 78.06
2,Context:\nSome time ago a website highlighted ...,Question difficulty: 77.02
3,Context:\nSome time ago a website highlighted ...,Question difficulty: 72.86
4,Context:\nSome time ago a website highlighted ...,Question difficulty: 77.02


In [7]:
# try with first 20 examples
few_shot_df = few_shot_df.head(20)
few_shot_list = [{"input": row["input"], "output": row["output"]} for _, row in few_shot_df.iterrows()]

# Dynamic few-shot prompting

## Create example selector

NOTE: I need OpenAI credits to use the OpenAI embeddings.

In [15]:
# examples = few_shot_list
# to_vectorize = [" ".join(example.values()) for example in examples]
# embeddings = OpenAIEmbeddings()
# vectorstore = Chroma.from_texts(to_vectorize, embeddings, metadatas=examples)

In [None]:
# example_selector = SemanticSimilarityExampleSelector(
#     vectorstore=vectorstore,
#     k=2,
# )

# # The prompt template will load examples by passing the input do the `select_examples` method
# example_selector.select_examples({"input": "horse"})

In [16]:
class RandomExampleSelector(BaseExampleSelector):
    def __init__(self, examples):
        self.examples = examples

    def add_example(self, example):
        self.examples.append(example)

    def select_examples(self, input_variables):
        random_match = random.choice(self.examples)
        return [random_match]

example_selector = RandomExampleSelector(examples=few_shot_list)
example_selector.select_examples({})

[{'input': 'Context:\nSome time ago a website highlighted the risks of public check-ins  online announcements of your whereabouts. The sites point was blunt: you may think you are just telling the world, Hey, Im at this place  but you are also advertising your out-and-about-ness to all kinds of people everywhere  not all of them people you might like to bump into. This appeared to confirm the growing awareness that there might be a downside to all the frantic sharing the web has enabled. The vast new opportunities to publish any and every aspect of our lives to a potentially global audience hold out all sorts of tantalising possibilities: Wealth! Fame! So we plunge into the maelstrom of the internet, tossing confessions, personal photos and stories into the digital vortex. Too late we realise that the water is crowded and treacherous  and we are lost. \nDepressing? Perhaps, but dont give up. This future has a map, drawn for us years ago by a reckless group of online pioneers. In the ea

## Create prompt template

In [18]:
# Define the few-shot prompt.
few_shot_prompt = FewShotChatMessagePromptTemplate(
    # The input variables select the values to pass to the example_selector
    input_variables=["input"],
    example_selector=example_selector,
    # Define how each example will be formatted.
    # In this case, each example will become 2 messages:
    # 1 human, and 1 AI
    example_prompt=ChatPromptTemplate.from_messages(
        [("human", "{input}"), ("ai", "{output}")]
    ),
)

print(few_shot_prompt.invoke(input="What's 3 🦜 3?").to_messages())

[HumanMessage(content='Context:\nAt its most abstract, science shades into philosophy; at its most practical it cures disease. It has eased our lives and threatened our existence. It aspires, but in some very basic way fails, to understand the ant and the origins of the universe, the infinitesimal atom and the mind-bludgeoning immensity of the cosmos. It has laid its hand on the shoulders of poets and politicians, philosophers and charlatans. Its beauty is often apparent only to the initiated, its perils are generally misunderstood, its importance has been both over and underestimated, and its fallibility, and that of those who create it, is often glossed over or malevolently exaggerated.\nThe attempt to explain the physical universe has been characterized by perpetual conflict. Established theories have continually been modified or violently overthrown, and as in the history of music, innovations tend to be ridiculed only to become, in time, the new dogma. The struggle between old and

In [27]:
SYSTEM_PROMPT = "You will be shown a multiple choice question from an English reading comprehension exam. You will be asked to provide the difficulty level of the question."

In [28]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", SYSTEM_PROMPT),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)

print(few_shot_prompt.invoke(input="What's 3 🦜 3?"))

messages=[HumanMessage(content='Context:\nAt its most abstract, science shades into philosophy; at its most practical it cures disease. It has eased our lives and threatened our existence. It aspires, but in some very basic way fails, to understand the ant and the origins of the universe, the infinitesimal atom and the mind-bludgeoning immensity of the cosmos. It has laid its hand on the shoulders of poets and politicians, philosophers and charlatans. Its beauty is often apparent only to the initiated, its perils are generally misunderstood, its importance has been both over and underestimated, and its fallibility, and that of those who create it, is often glossed over or malevolently exaggerated.\nThe attempt to explain the physical universe has been characterized by perpetual conflict. Established theories have continually been modified or violently overthrown, and as in the history of music, innovations tend to be ridiculed only to become, in time, the new dogma. The struggle betwee

## Use with a chat model

In [29]:
test_example = {"input": few_shot_df.loc[21, "input"]}
test_example

{'input': 'Context:\nAt its most abstract, science shades into philosophy; at its most practical it cures disease. It has eased our lives and threatened our existence. It aspires, but in some very basic way fails, to understand the ant and the origins of the universe, the infinitesimal atom and the mind-bludgeoning immensity of the cosmos. It has laid its hand on the shoulders of poets and politicians, philosophers and charlatans. Its beauty is often apparent only to the initiated, its perils are generally misunderstood, its importance has been both over and underestimated, and its fallibility, and that of those who create it, is often glossed over or malevolently exaggerated.\nThe attempt to explain the physical universe has been characterized by perpetual conflict. Established theories have continually been modified or violently overthrown, and as in the history of music, innovations tend to be ridiculed only to become, in time, the new dogma. The struggle between old and new has rar

In [30]:
chain = final_prompt | ChatOpenAI(model="gpt-4o-mini", temperature=0.0)

chain.invoke(test_example)

AIMessage(content='Question difficulty: 60.75', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 1916, 'total_tokens': 1924, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_06737a9306', 'finish_reason': 'stop', 'logprobs': None}, id='run-39101db3-65a2-4cde-aaa8-bd39b0276089-0', usage_metadata={'input_tokens': 1916, 'output_tokens': 8, 'total_tokens': 1924, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})