In [1]:
import pandas as pd
from pathlib import Path
import together
import os
from openai import OpenAI
import random
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

In [2]:
from tqdm import tqdm

In [3]:
annotations = pd.read_csv('BMDS_story_annotations.csv')
annotations = annotations[annotations["Reveal border sentence"].notnull()]
border_sentences = annotations[["Story Code", "Reveal border sentence"]]

In [27]:
results_dict = {'story': [], 'reveal_sentence': []}

# sample 20 stories

In [18]:
import concurrent.futures
from tqdm import tqdm
client = OpenAI(api_key="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")

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def completion_with_backoff(**kwargs):
    response = client.chat.completions.create(**kwargs)
    return response 

def summarize_one(story_id):
    prompt_base = "Create a 1 paragraph plot outline of the following story:\n"
    try:
        with open(f'./texts/{story_id}.txt') as f:
            story = f.read()
            prompt = prompt_base + story
            response = completion_with_backoff(
                model="gpt-3.5-turbo-0125",
                temperature=0.8,
                messages=[
                    {"role": "system", "content": "You are an author's assistant."},
                    {"role": "user", "content": prompt}
                ]
            )
            return story_id, response.choices[0].message.content
    except Exception as e:
        print(str(e))
        return story_id, None

def summarize():
    
    summaries = {}
    sample_stories = border_sentences.sample(5)
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(summarize_one, row["Story Code"]) for _, row in sample_stories.iterrows()]
        
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
            story_id, summary = future.result()
            summaries[story_id] = summary
        
    return summaries


# Write 20 stories

In [19]:
def process_story(story_id, seed_key, seed_prompt, seed_story):
    prompt_base = "Write a 500-1000 word story based on the provided outline.\n"
    try:

        # Construct the prompt for the current story_id
        prompt = prompt_base + summaries[story_id]
        # Call the completion function
        response = completion_with_backoff(
            model="gpt-3.5-turbo-0125",
            temperature=0.8,
            messages=[
                {"role": "system", "content": "You are an author. Your job is to write interesting mystery stories."},
                {"role": "user", "content": seed_prompt},
                {"role": "assistant", "content": seed_story},
                {"role": "user", "content": prompt}
            ]
        )

        return story_id, response.choices[0].message.content

    except Exception as e:
        error = str(e)
        print(f"Error in story {story_id}: {error}")
        return story_id, None

def write():
    stories = {}
    prompt_base = "Write a 500-1000 word story based on the provided outline.\n"
    seed_key = random.choice(list(summaries.keys()))
    seed_prompt = prompt_base + summaries[seed_key]
    with open(f'./texts/{seed_key}.txt') as f:
        seed_story = f.read()
    # Create a ThreadPoolExecutor
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit tasks to the executor
        futures = [executor.submit(process_story, story_id, seed_key, seed_prompt, seed_story) for story_id in summaries]
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
            story_id, summary = future.result()
            stories[story_id] = summary
    return stories


# border sentence of 20 stories

In [51]:
def read_story(story_id, seed_prompt, seed_sentence):
    try:
        # Construct the prompt for the current story_id
        prompt_base = "In the following story, identify the sentence where the mystery is revealed:"
        prompt = prompt_base + stories[story_id]
        # Call the completion function
        response = completion_with_backoff(
            model="gpt-3.5-turbo-0125",
            temperature=0.8,
            messages=[
                {"role": "system", "content": "You are an author. Your job is to read interesting mystery stories."},
                {"role": "user", "content": seed_prompt},
                {"role": "assistant", "content": seed_sentence},
                {"role": "user", "content": prompt}
            ]
        )

        return story_id, response.choices[0].message.content

    except Exception as e:
        error = str(e)
        print(f"Error in story {story_id}: {error}")
        return story_id, None

def find_border():
    border_sentence = {}
    prompt_base = "In the following story, identify the sentence where the mystery is revealed:"
    seed_key = random.choice(list(stories.keys()))
    seed_prompt = prompt_base + stories[seed_key]
    seed_sentence = border_sentences[border_sentences['Story Code'] == seed_key]['Reveal border sentence'].values[0]

    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit tasks to the executor and gather futures
        futures = [executor.submit(read_story, story_id, seed_prompt, seed_sentence) for story_id in tqdm(summaries)]

        # Iterate over completed tasks
        for future in tqdm(concurrent.futures.as_completed(futures)):
            result = future.result()
            if result is not None:
                story_id, border_sentence_content = result
                border_sentence[story_id] = border_sentence_content

    return border_sentence

In [50]:
revisit_stories = []

In [None]:
import time
for i in range(30):
    time.sleep(10)
    summaries = summarize()
    condition = lambda key, value: value != None
    summaries = {key: value for key, value in summaries.items() if condition(key, value)}
    time.sleep(5)
    stories = write()
    stories = {key: value for key, value in stories.items() if condition(key, value)}
    time.sleep(5)
    border_sentence = find_border()
    border_sentence = {key: value for key, value in border_sentence.items() if condition(key, value)}
    
    for key in border_sentence.keys():
        if key in stories:
            story = stories[key]
            sentence = border_sentence[key]
            if story.find(sentence)!=-1:
                results_dict['story'].append(story)
                results_dict['reveal_sentence'].append(sentence)
            else:
                revisit_stories.append(story)
    print(len(results_dict['story']))
    


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:05<00:00,  1.00s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:39<00:00,  7.84s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 193.30it/s]
5it [00:01,  4.55it/s]


100


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:24<00:00,  4.92s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:08<00:00, 13.73s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 142.97it/s]
5it [00:01,  2.62it/s]


105


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:32<00:00,  6.53s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:59<00:00, 11.86s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 197.81it/s]
5it [00:01,  3.36it/s]


110


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:34<00:00,  6.89s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:56<00:00, 11.23s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 217.02it/s]
5it [00:18,  3.69s/it]


112


 80%|████████████████████████████████████████████████████████████████████████████████████████████▊                       | 4/5 [01:35<00:27, 27.14s/it]

RetryError[<Future at 0x14f45b110 state=finished raised RateLimitError>]


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:47<00:00, 21.51s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:44<00:00, 11.13s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 131.62it/s]
4it [00:18,  4.54s/it]


116


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:59<00:00, 11.84s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:51<00:00, 10.30s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 112.50it/s]
5it [00:28,  5.68s/it]


116


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:48<00:00,  9.61s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:29<00:00, 17.86s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 267.81it/s]
5it [00:28,  5.72s/it]


118


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:36<00:00,  7.37s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:56<00:00, 11.28s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 158.49it/s]
5it [00:28,  5.65s/it]


121


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:10<00:00, 14.01s/it]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:47<00:00,  9.55s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 120.41it/s]
5it [00:28,  5.65s/it]


121


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:33<00:00,  6.78s/it]
 80%|████████████████████████████████████████████████████████████████████████████████████████████▊                       | 4/5 [01:10<00:17, 17.27s/it]

In [32]:
print(len(revisit_stories))

98


In [33]:
len(results_dict['reveal_sentence'])

97

In [None]:
df = pd.DataFrame(results_dict)
df.to_csv('synthetic_batch_7.csv')

In [41]:
def read_story(story, seed_prompt, seed_sentence):
    try:
        # Construct the prompt for the current story_id
        prompt_base = "In the following story, identify the sentence where the mystery is revealed:"
        prompt = prompt_base + story
        # Call the completion function
        response = completion_with_backoff(
            model="gpt-3.5-turbo-0125",
            temperature=0.8,
            messages=[
                {"role": "system", "content": "You are an author. Your job is to read interesting mystery stories."},
                {"role": "user", "content": seed_prompt},
                {"role": "assistant", "content": seed_sentence},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content

    except Exception as e:
        error = str(e)
        print(f"Error in story {story_id}: {error}")
        return None

def find_border(batch):
    border_sentence = []
    prompt_base = "In the following story, identify the sentence where the mystery is revealed:"
    seed_key = random.choice(list(stories.keys()))
    seed_prompt = prompt_base + stories[seed_key]
    seed_sentence = border_sentences[border_sentences['Story Code'] == seed_key]['Reveal border sentence'].values[0]

    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Submit tasks to the executor and gather futures
        futures = [executor.submit(read_story, story, seed_prompt, seed_sentence) for story in tqdm(batch)]

        # Iterate over completed tasks
        for future in tqdm(concurrent.futures.as_completed(futures)):
            result = future.result()
            if result is not None:
                border_sentence_content = result
                border_sentence.append(border_sentence_content)

    return border_sentence

In [47]:
i = 0
border_sentence_revisit = []
revisit_2 = []
while i < len(revisit_stories):
    batch = revisit_stories[i:i+5]
    border_sentence = find_border(batch)
    time.sleep(5)
    for j in range(len(batch)):
        story = batch[j]
        sentence = border_sentence[j]
        print(sentence)
        if story.find(sentence)!=-1:
            border_sentence_revisit.append(sentence)
        else:
            revisit_2.append(story)
    i+=5
    print(len(border_sentence_revisit))
    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 140.04it/s]
5it [00:01,  3.39it/s]


It became clear that the orangutan had abducted the child from the Blake home and carried him to safety on the fire escape.
"He deduced that the poison was intended for someone else, a rival author who had been threatening Emily's success in the literary world."
"Holmes, do you think there could be a connection between this Russian man and the mysterious woman in our case?" Watson asked, peering over Holmes's shoulder at the faded print.
"It appears that this Beddington has cunningly impersonated you to gain access to your position at the firm. I suspect he needed a specimen of your handwriting to forge documents and manipulate your identity for his own illicit purposes."
"As Holmes delved into the investigation, he discovered a web of deceit and betrayal that ran deep within the lives of Josiah Amberley, his wife Mary, and her lover Victor Durand."
0


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 155.54it/s]
5it [00:01,  2.74it/s]


Without hesitation, Uncle Abner sprang into action.
The more he investigated, the more convinced he became that there was more to the young man's death than met the eye.
Through careful observation and thorough investigation, Kennedy uncovered a sinister plot orchestrated by Maude Schofield, a distant relative of the Athertons, who harbored a deep resentment towards the family.
The investigation took a surprising turn when Miss Strange discovered a crucial piece of evidence—a torn piece of fabric hidden under the victim's desk. Through careful analysis and deductive reasoning, she determined that the fabric matched a dress worn by Mrs. Ellis's maid on the night of the murder.
The Englishman had been part of a clandestine network of agents, each with their own hidden agendas and loyalties.
1


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 138.31it/s]
5it [00:01,  4.03it/s]


The pieces of the puzzle were slowly falling into place, and Hewitt's sharp investigative skills allowed him to see through the elaborate scheme.
"As Abner presented the damning evidence, Gaul's resolve wavered."
"Feeling the weight of their accusations and unable to bear the shame of being branded a thief, Clifford made a swift exit from the party."
But the true shock came when the identity of the orchestrator was revealed—a daring twist that exposed Count Rovigno, Julia's own husband, as the mastermind behind the sinister events.
"It's over, gentlemen," Tatlock said, his voice cold and unwavering. "You may have thought you could outsmart me, but justice always prevails in the end."
2


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 85.21it/s]
5it [00:01,  2.91it/s]


Kennedy, intrigued by the mysterious case presented to him, agreed to investigate the matter.
The group's target was Baron Krieger, a German diplomatist and financier who was in New York to secure a private loan for his country.
One night, as a storm raged outside, the ship was thrown into chaos.
The key to the phantom car's disappearance lay in a hidden passage in the trap area, allowing Marsh and his accomplices to navigate through undetected, creating the illusion of a swift and elusive vehicle.
As Cleek pieced together the clues, he uncovered a startling revelation. Ulchester had not taken his own life as everyone believed. Instead, he had devised a cunning plan to escape with Zuilika's inheritance, masquerading as her to avoid suspicion.
2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 123.00it/s]
5it [00:01,  4.25it/s]


Holmes and Watson gathered evidence and unraveled the Baron's schemes, they realized the urgency of their task.
Apologies, but I couldn't find a sentence in the story that clearly reveals the mystery. Would you like me to try again with a different story?
In the sentence "They uncovered evidence that pointed to Edward's involvement in her father's death," the mystery is revealed as the involvement of Madeline's cousin, Edward, in her father's death.
I'm sorry, but I couldn't find a sentence where the mystery is revealed in the provided text. If you have another story or passage you'd like me to analyze, feel free to share it!
The mystery is revealed in the sentence: "His sharp mind and keen eye for detail had led him to uncover the connection between the missing gold and the fishing boat owned by the Garthews in Lostella."
2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 121.20it/s]
5it [00:02,  2.40it/s]


As Dorcas closed in on the truth, the culprit grew desperate and attempted to flee.
opened his mouth to demand an answer, and the movement revealed an enormous gap in his side teeth.
Mabel Vernon sat in the cozy living room of her friend Jack Templeton, nervously twisting the hem of her skirt between her fingers.
He placed a small amount of jam on Jeanette's pillow, anticipating that it would trigger her memory and reveal the truth behind the missing gem.
In the chamber, Kennedy found Veda Blair, his wife, lying unconscious on a bed, her skin pale and her breathing shallow.
2


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 115.69it/s]
5it [00:04,  1.14it/s]


Holmes' eyes gleamed with understanding as he connected the dots.
In the days that followed, Miss Burton couldn't stop thinking about her encounter with Mr. Ashley and the chilling revelations he had shared with her.
Through a series of unexpected revelations and dark secrets coming to light, Harold Le Cheminant emerged as the true heir to the Earldom, casting a shadow of doubt and suspicion over Lord Tremarn's actions.
Together, James and Professor Quarles delved into the depths of the case, following a trail of clues that led them to Sir Henry Buckingham, a wealthy man with a dark secret hidden behind the walls of his estate.
Cleek uncovered the truth behind the murder of the Duchess of Heatherlands and the theft of the Siva stones.
4


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 140.93it/s]
5it [00:01,  3.50it/s]


"The discovery of the forged painting sent shockwaves through the art community..."
One evening, as Abner was exploring the grounds of the Highland house, he overheard a conversation between Campbell and a shadowy figure that confirmed his suspicions.
"Through a series of meticulous observations and scientific experiments, The Thinking Machine uncovered a startling revelation."
"Holmes's sharp mind pieced together the intricate web of deceit and treachery that had been spun around the unsuspecting guests."
"It was during a midnight rendezvous in the garden that Monsieur Rocheville discovered Miss Fayerwether's involvement in the jewel thefts at Idlewild."
4


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 119.41it/s]
5it [00:01,  3.79it/s]


Low's investigations led him to Phil Strewd, a trusted friend and fellow expert in the supernatural.
Pearcy's chilling confession sent shockwaves through the town, as he revealed his twisted motives for orchestrating the poisoning plot.
Uncle Abner, with his piercing gaze and gentle demeanor, listened intently to Betty's tale of woe.
With this crucial piece of evidence in hand, Kennedy began his investigation in earnest.
"This was all a ruse, Mr. Haddon," he declared, his voice cutting through the silence. "We knew of your involvement in the theft of the emerald necklace, and we orchestrated this elaborate scheme to bring you to justice."
6


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 79.12it/s]
5it [00:01,  2.60it/s]


With a heavy heart, Polly realized the depths to which human nature could sink in the pursuit of selfish desires.
Zaleski realized the gravity of the situation he was facing.
In a dramatic turn of events, the truth began to emerge, clearing Harold of any wrongdoing and exposing the calculated schemes of Mr. Keeson.
As the pieces of the puzzle fell into place, it became clear that Maude's father, Barry Euston, was indeed part of the scheme to stage a train robbery and manipulate the stock market.
With each piece of evidence she uncovered, the puzzle of the York Mystery became more complex, leading her down a path filled with danger and intrigue.
6


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 109.43it/s]
5it [00:02,  2.13it/s]


Holmes listened thoughtfully, his brilliant mind already working to piece together the puzzle.
Thorpe Hazell sat in his office, surrounded by the various oddities and curiosities he had accumulated over the years.
With her exceptional code-breaking abilities, Kennedy deciphered a cryptic message left behind by Singh.
The evidence pointed to a sophisticated forgery scheme orchestrated by the timber merchant, who had been using the bank to launder money and deceive his clients.
The man leaned in, his voice dropping to a conspiratorial whisper. "Let me tell you about the case of the Hon. Robert Ingram de Genneville," he began, his words weaving a complex tapestry of intrigue and deception. "A man ensnared in a labyrinth of ancient family documents, disputed peerages, and legal entanglements that would baffle even the most astute minds."
6


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 168.12it/s]
5it [00:01,  3.70it/s]


According to the associate, who wished to remain anonymous, Holmes had been investigating the murder of Ronald Adair in secret all these years.
It was revealed that the mountebank was none other than Blackford's long-lost father.
The mystery is revealed in the sentence: "With a deft hand and a quick mind, Cleek managed to secure the stolen fragment of the document that held vital information about France's secret defenses."
The letter hinted at a nefarious plot, and Kennedy's keen mind immediately honed in on the enigmatic Mrs. Popper as a key player in the deadly game unfolding at the Vandam mansion.
The mystery is revealed in the sentence: "Could it be possible that Heathcote had faked his death, only to return to his family on the brink of tragedy?"
7


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 124.12it/s]
5it [00:01,  2.56it/s]


The plot thickened as they uncovered evidence that suggested Evelyn had stumbled upon this espionage scheme and had paid the ultimate price for it.
Their investigation led them to various locations in London, where they interviewed acquaintances and gathered clues. It became apparent that Staunton's sudden departure was indeed connected to his secret marriage, a union that could have endangered his inheritance.
The news of the mysterious death spread quickly, and soon the local police were called in to investigate. Among them was Inspector Thomas Bennett, a seasoned detective known for his keen eye and sharp intellect. As he examined the scene, he couldn't shake the feeling that there was more to this case than met the eye.
Finally, after a harrowing pursuit that tested his intellect and courage to the limit, The Thinking Machine uncovered the truth behind the conspiracy.
His suspicions fell upon his niece, Mary Crossley, who had been acting strangely in recent weeks.
8


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 111.55it/s]
5it [00:01,  3.47it/s]


The sudden revelation struck him: it couldn't have been a human who abducted Baby Blake.
Through careful observation and deduction, Lady Molly uncovered Lady Irene's motive for the murder.
"We must act swiftly," he said, his voice calm but urgent. "There is a plot afoot to harm your father and other key officials of the company. But fear not, Miss Euston, we will get to the bottom of this."
The investigation unfolded a perplexing truth - the murders were not driven by rational motives or personal vendettas but by a primal instinct that defied reason.
The plot thickened when Kennedy uncovered a connection between Nordheim and a group of Japanese servants working for Mrs. Brainard, a wealthy socialite with ties to the highest echelons of society.
9


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 143.48it/s]
5it [00:01,  3.70it/s]


Mr. Beck was called in to investigate the case.
He poured over the witness statements, the timeline of events, and the physical evidence.
"The inventor had devised a scheme to gain control of the old man's vast fortune by incapacitating him and rendering him completely dependent on Prescott for his care and well-being."
The revelation of the mystery occurs when it is discovered that Mabel is secretly married to Claud Ransome, who is manipulating and tormenting her.
Detective Mallory was known for his sharp wit and keen investigative skills, but even he was perplexed by the circumstances surrounding the man's death.
9


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 204.22it/s]
5it [00:01,  4.73it/s]


KeyboardInterrupt: 