In [1]:
def build_prompt(story):
    example = """
        <s>[INST]
        I have the following story:
        - Once upon a time in a quiet village, a young girl named Elara discovered a hidden map that led to a forgotten treasure. She embarked on a daring adventure through enchanted forests and ancient ruins.

        Please give me exactly five keywords that are present in this story and separate them with commas.
        Make sure you only return the keywords and say nothing else. For example, don't say:
        "Here are the keywords present in the story"
        [/INST] village, Elara, map, treasure, adventure</s>
        """
    prompt = f"""
        [INST]
        I have the following story:
        - {story}

        Please give me exactly five keywords that are present in this story and separate them with commas.
        Make sure you only return the keywords and say nothing else. For example, don't say:
        "Here are the keywords present in the story"
        [/INST]"""
    return example + prompt


In [3]:
import subprocess
import json
import pandas as pd

def run_with_ollama(prompt):
    print(1)
    # Use subprocess to talk to Ollama
    result = subprocess.run(
        ['ollama', 'run', 'mistral'],
        input=prompt.encode(),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE
    )
    return result.stdout.decode()

# Load your DataFrame
df = pd.read_csv("short_childrens_stories_train.csv")

output_file = "FewShotPrompting-Keywords.jsonl"
done_indices = set()

try:
    with open(output_file, "r") as f:
        for line in f:
            obj = json.loads(line)
            done_indices.add(obj["index"])
except FileNotFoundError:
    pass

for idx, row in df.iterrows():
    if idx in done_indices:
        continue

    prompt = build_prompt(row["story"])
    try:
        output = run_with_ollama(prompt)
        print("output", output)
        lines = output.strip().splitlines()
        keywords = []
        script_lines = []

        for i, line in enumerate(lines):
            if "keywords:" in line.lower():
                keyword_line = line.split(":", 1)[1]
                raw_keywords = keyword_line.split(",")
                keywords = [kw.strip(" *-_.").strip() for kw in raw_keywords if kw.strip()]
                script_lines = lines[:i] + lines[i+1:]
                break
        else:
            script_lines = lines

        formatted_script = "\n".join(script_lines).strip()

        with open(output_file, "a") as f:
            json.dump({
                "index": idx,
                "story": row["story"],
                "keywords": keywords,
                "script": formatted_script
            }, f)
            f.write("\n")

        print(f"✅ Saved story {idx}")
    except Exception as e:
        print(f"❌ Failed at index {idx}: {e}")


1
output  Philadelphia, Timmy, sports, Jimmy Lyggett SR, inspiration


✅ Saved story 3
1
output  Home Runville, Derby, Tommy, Alexa, Trajectory Tracking


✅ Saved story 4
1
output  bustling library town, Timmy, Sally, historical books, geography, anthropology


✅ Saved story 5
1
output  Little Bunny, bird, meadow, friend, emotions


✅ Saved story 6
1
output  Sarah, couch, sleep, science, resources


✅ Saved story 7
1
output  Israel, tribes, Saul, David, Messiah


✅ Saved story 8
1
output  Orange County, California, Vicki, Tamra, Jeana, Gretchen, Lynne, beautiful bird, carotenoids


✅ Saved story 9
1
output  Mathville, Calvin, Emma, equation, lemon rain


✅ Saved story 10
1
output  workshop, Timber Teddies, Woodrow, screw, toolbox


✅ Saved story 11
1
output  ChloroFriends, Little Leaf, Mature Maple, science fair, chloroplasts


✅ Saved story 12
1
output  village, animals, rainbow, adventure, refraction


✅ Saved story 13
1
output 2012, James, Lars, Metallica, Orion Music + More, Black 