In [1]:
import pandas as pd
from datasets import load_dataset

df = pd.read_csv('init-narrativeqa.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,questions,correct_answer,sources
0,0,WHO NORMALLY DELIVERS THE OPENING PROLOGUE IN ...,THE ACTOR WEARING THE BLACK CLOAK,The play begins with three pages disputing ov...
1,1,WHAT NAME WAS CYNTHIA MORE FAMOUSLY KNOWN BY?,THE GODDESS DIANA,The play begins with three pages disputing ov...
2,2,WHO DOES ECHO WEEP FOR?,NARCISSUS,The play begins with three pages disputing ov...
3,3,WHAT DOES A DRINK FROM NARCISSUS'S SPRING CAUS...,FALL IN LOVE WITH THEMSELVES,The play begins with three pages disputing ov...
4,4,IN WHAT VALLEY DID THE SOLEMN REVELS OF CYNTHI...,GARGAPHIE IN GREECE,The play begins with three pages disputing ov...


In [2]:
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-4k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct", use_fast=False)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [25]:
from transformers import pipeline 

def produce_prompt(question, correct, source):
    
    user_content = "Question: " + question + "\n Correct answer: " +  correct + "\n Context: " + source + "\n\n Assistant:"

    messages = [ 
        {"role": "system", "content": """
        You are a helpful AI assistant. You are given a question and the correct answer to it. 
        Given the context, you have to provide a wrong, yet realistic, alternative answer to the same question given the context. 
        Output a synthetic answer in the same style as the correct answer. 
        
        For example: 

        Question: what does a drink from narcissus's spring cause the drinker to do?
        Correct answer: fall in love with themselves
        Context: the play begins with three pages disputing over the black cloak usually worn by the actor who delivers the prologue. they draw lots for the cloak, and one of the losers, anaides, starts telling the audience what happens in the play to come; the others try to suppress him, interrupting him and putting their hands over his mouth. soon they are fighting over the cloak and criticizing the author and the spectators as well.\nin the play proper, the goddess diana, also called cynthia, has ordained a "solemn revels" in the valley of gargaphie in greece. the gods cupid and mercury appear, and they too start to argue. mercury has awakened echo, who weeps for narcissus, and states that a drink from narcissus\'s spring causes the drinkers to "grow dotingly enamored of themselves." the courtiers and ladies assembled for the cynthia\'s revels all drink from the spring.\nasotus, a foolish spendthrift who longs to become a courtier and a master of fashion and manners, also drinks from the spring; emboldened by vanity and self-love, he challenges all comers to a competition of "court compliment." the competition is held, in four phases, and the courtiers are beaten. two symbolic masques are performed within the play for the assembled revelers. at their conclusion, cynthia (representing queen elizabeth) has the dancers unmask and shows that vices have masqueraded as virtues. she sentences them to make reparation and to purify themselves by bathing in the spring at mount helicon.\nthe figure of actaeon in the play may represent robert devereux, 2nd earl of essex, while cynthia\'s lady in waiting arete may be lucy, countess of bedford, one of elizabeth\'s ladies in waiting as well as jonson\'s patroness.\nthe play is notably rich in music, as is typical for the theatre of the boys\' companies, which originated as church choirs.

        Assistant: weep for narcissus.

        """}, 
        {"role": "user", "content": "Now to the same for this problem: " + user_content},
    ] 
    return messages

pipe = pipeline( 
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
) 

generation_args = { 
    "max_new_tokens": 20, 
    "return_full_text": False, 
    "temperature": 0.0, 
    "do_sample": False, 
} 

In [28]:
alternatives = []

for i in range(len(df)):
    output = pipe(produce_prompt(df['questions'][i], df['correct_answer'][i], df['sources'][i]), **generation_args) 
    alternatives.append(output[0]['generated_text']) 

In [29]:
alternatives[:5]

[' The goddess Diana, also called Cynthia. <|end|>',
 ' The goddess Diana. <|end|>',
 ' Echo weeps for Narcissus. <|end|>',
 ' become obsessed with their own appearance. <|end|>',
 ' The solemn revels of Cynthia took place in the valley of Helicon in Greece.']

In [30]:
df['alternative'] = alternatives

In [31]:
df['alternative'] = df['alternative'].str.replace('<|end|>', '', regex=False)

In [32]:
# df['alternative'] = df['alternative'].apply(lambda x: " ".join(x.split(':', 1)[-1].split()).strip().lower() if ':' in x else " ".join(x.split()).lower())

In [33]:
df['questions'] = df['questions'].str.lower()
df['correct_answer'] = df['correct_answer'].str.lower()
df['sources'] = df['sources'].str.lower()
df['alternative'] = df['alternative'].str.lower()

In [34]:
df.head()

Unnamed: 0.1,Unnamed: 0,questions,correct_answer,sources,alternative
0,0,who normally delivers the opening prologue in ...,the actor wearing the black cloak,the play begins with three pages disputing ov...,"the goddess diana, also called cynthia."
1,1,what name was cynthia more famously known by?,the goddess diana,the play begins with three pages disputing ov...,the goddess diana.
2,2,who does echo weep for?,narcissus,the play begins with three pages disputing ov...,echo weeps for narcissus.
3,3,what does a drink from narcissus's spring caus...,fall in love with themselves,the play begins with three pages disputing ov...,become obsessed with their own appearance.
4,4,in what valley did the solemn revels of cynthi...,gargaphie in greece,the play begins with three pages disputing ov...,the solemn revels of cynthia took place in th...


In [35]:
df.to_csv('test-narrative.csv')