In [None]:
from llama_cpp import Llama
import random
import time
import re
import pandas as pd
import os

In [None]:
# initialize model
llm = Llama(
    model_path=r"models\llama3-instruct\meta-llama-3-8b-instruct.Q4_K_M.gguf",
    n_gpu_layers=-1,
    n_ctx=4096,
    use_mlock=True
)

In [None]:
# diverse themes
environments = [
    "a desert canyon with ancient ruins",
    "a coral reef teeming with fish and sea creatures",
    "a snowy mountain village with wooden cabins",
    "a futuristic city with flying vehicles and neon lights",
    "an enchanted forest with mythical creatures and glowing plants",
    "an alien planet with crystal towers and two suns",
    "a medieval castle courtyard filled with armored knights",
    "a jungle temple hidden behind waterfalls",
    "an underground cavern with glowing minerals and creatures",
    "a floating island above the clouds with birds and bridges",
    "an ancient Greek amphitheater during a performance",
    "a Roman marketplace bustling with citizens",
    "a Viking longhouse in the snowy north",
    "a 19th-century steam train station",
    "an Egyptian temple under the stars",
    "a medieval tavern crowded with travelers",
    "a lunar base under Earthlight",
    "a terraformed Mars colony with red soil and bio-domes",
    "a space station orbiting Saturn",
    "a time machine room filled with gears and lights",
    "a robotic jungle reclaimed by nature",
    "a wizard’s tower surrounded by floating runes",
    "a crystal cave hidden beneath a volcano",
    "a fairy circle deep within a mossy glade",
    "a cursed battlefield shrouded in purple mist",
    "a phoenix nest atop a burning mountain",
    "a rainforest canopy alive with colorful birds",
    "a glacier cave glowing with icy blue light",
    "a savannah at sunset with migrating animals",
    "a mangrove swamp with hidden crocodiles",
    "a kelp forest swaying in the deep sea",
    "a windblown arctic tundra with polar bears",
    "a busy urban market at night in Southeast Asia",
    "a rooftop garden on a Tokyo skyscraper",
    "a Paris café terrace on a rainy day",
    "a New York subway platform at rush hour",
    "a coastal boardwalk during a summer festival",
    "a remote village during a lantern celebration",
]

# 防止连续重复：每次不选刚用过的前3条
last_envs = []

def choose_env(environments, last_envs, k=3):
    options = [env for env in environments if env not in last_envs[-k:]]
    env = random.choice(options)
    last_envs.append(env)
    return env


In [None]:
target = 8000
batch_size = 100
results = []
file_path = "generated_prompts_8000.csv"

if os.path.exists(file_path):
    os.remove(file_path)

columns = ["env", "prompt",
           "q1", "q1_choices", "q1_answer",
           "q2", "q2_choices", "q2_answer",
           "q3", "q3_choices", "q3_answer",
           "q4", "q4_choices", "q4_answer"]

pd.DataFrame(columns=columns).to_csv(file_path, index=False, encoding="utf-8")

In [None]:
start_time = time.time()
count = 0

while count < target:
    env = choose_env(environments, last_envs)
    
import time
start_time = time.time()

results = []

for i in range(8000):
    env = choose_env(environments, last_envs)

    # format the instruction prompt
    instruction = f"""
        Generate a NEW fictional image description about the following setting: **{env}**  
        Include at least one character (person or creature) or structure (like building or statue).  
        Limit the description to less than 15 words. Then generate FOUR questions about it.  
        Your image description must be between 10 and 15 words.

        Each question must have answer choices (either yes/no or 2 to 4 options).  
        Format:  

        Prompt: [description]  
        Questions:  
        1. [question]  
        - choices: [list]  
        - answer: [correct choice]
        2. [question]  
        - choices: [list]  
        - answer: [correct choice]
        3. [question]  
        - choices: [list]  
        - answer: [correct choice]
        4. [question]  
        - choices: [list]  
        - answer: [correct choice]
    """

    prompt = (
        "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n"
        "You are a helpful assistant.<|eot_id|>\n"
        "<|start_header_id|>user<|end_header_id|>\n"
        f"{instruction}<|eot_id|>\n"
        "<|start_header_id|>assistant<|end_header_id|>\n"
    )

    output = llm(prompt, max_tokens=512, temperature=0.95)
    results.append(f"--- Sample {i+1} ({env}) ---\n" + output["choices"][0]["text"].strip())

end_time = time.time()
total_time = end_time - start_time

print(f"\n Total_Time: {total_time:.2f} s(About {total_time/60:.2f} mins)")

In [None]:
# display results
df = pd.DataFrame(results)
df.to_csv("generated_prompts_8000.csv", index=False, encoding="utf-8")
print("Saved to generated_prompts_8000.csv")