# Data generator

In [None]:
import requests
import json
import time
from pathlib import Path

# ==================== Configuration ====================
API_KEY = "svPU7ggOb1jfTScqf3F4f6CXjaPni13C"
API_URL = "https://api.mistral.ai/v1/chat/completions"
MODEL = "mistral-small"

HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

def chat_with_mistral(messages, temperature):
    payload = {
        "model": MODEL,
        "messages": messages,
        "temperature": temperature,
        "top_p": 1.0,
        "stream": False
    }
    response = requests.post(API_URL, headers=HEADERS, data=json.dumps(payload), timeout=100)
    if response.status_code != 200:
        raise Exception(f"Request failed: {response.status_code} - {response.text}")
    response_data = response.json()
    return response_data['choices'][0]['message']['content']

In [None]:
# ========== Horror Story Generation ==========
Path("generated_data").mkdir(parents=True, exist_ok=True)
all_texts = []

prompt = """Strictly write a 5000-word horror story using vivid adjectives, strong emotions, 
and a gripping narrative arc. Keep into consideration that the story has to be 5000 words long."""

temperature = 0.9

for sample_index in range(1, 501):
    full_prompt = f"{prompt} This is sample {sample_index} of 500."
    chat_history = [{"role": "user", "content": full_prompt}]

    try:
        reply = chat_with_mistral(chat_history, temperature=temperature)
        all_texts.append(reply)
        print(f"[✓] Generated sample {sample_index}/500")
        time.sleep(3)  
    except Exception as e:
        print(f"[!] Error at sample {sample_index}: {e}")

final_filename = "generated_data/merged_horror_stories1.txt"
with open(final_filename, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_texts))

print(f"[✓] Merged file saved to: {final_filename}")

In [None]:
# ========== Horror Story Low Temperature Generation ==========
Path("generated_data").mkdir(parents=True, exist_ok=True)
all_texts = []

prompt = """Strictly write a 5000-word horror story using vivid adjectives, strong emotions, 
and a gripping narrative arc. Keep into consideration that the story has to be 5000 words long."""

temperature = 0.2

for sample_index in range(1, 501):
    full_prompt = f"{prompt} This is sample {sample_index} of 500."
    chat_history = [{"role": "user", "content": full_prompt}]

    try:
        reply = chat_with_mistral(chat_history, temperature=temperature)
        all_texts.append(reply)
        print(f"[✓] Generated sample {sample_index}/500")
        time.sleep(3) 
    except Exception as e:
        print(f"[!] Error at sample {sample_index}: {e}")

final_filename = "generated_data/merged_horror_stories_lowtemp.txt"
with open(final_filename, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_texts))

print(f"[✓] Merged file saved to: {final_filename}")

In [None]:
# ========== Scientific Text Generation ==========
Path("generated").mkdir(parents=True, exist_ok=True)
all_texts = []

prompt = """Write a detailed scientific explanation of approximately 5,000 words. 
Use a formal and objective tone, emphasizing technical accuracy and conceptual clarity. 
Employ precise terminology, abstract nouns, and domain-specific vocabulary appropriate 
for an academic or expert audience. """

temperature = 0.9

for sample_index in range(1,501):
    full_prompt = f"{prompt} This is sample {sample_index} of 10."
    chat_history = [{"role": "user", "content": full_prompt}]

    try:
        reply = chat_with_mistral(chat_history, temperature=temperature)
        all_texts.append(reply)
        print(f"[✓] Generated sample {sample_index}/500")
        time.sleep(3)  # Respectful delay if needed
    except Exception as e:
        print(f"[!] Error at sample {sample_index}: {e}")

# Save all merged horror stories to a single file
final_filename = "generated/merged_scientific.txt"
with open(final_filename, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_texts))

print(f"[✓] Merged file saved to: {final_filename}")

In [None]:
# ========== Scientific Text Low Temperature Generation ==========
Path("generated").mkdir(parents=True, exist_ok=True)
all_texts = []

prompt = """Write a detailed scientific explanation of approximately 5,000 words. 
Use a formal and objective tone, emphasizing technical accuracy and conceptual clarity. 
Employ precise terminology, abstract nouns, and domain-specific vocabulary appropriate 
for an academic or expert audience. """

temperature = 0.2

for sample_index in range(1,501):
    full_prompt = f"{prompt} This is sample {sample_index} of 10."
    chat_history = [{"role": "user", "content": full_prompt}]

    try:
        reply = chat_with_mistral(chat_history, temperature=temperature)
        all_texts.append(reply)
        print(f"[✓] Generated sample {sample_index}/500")
        time.sleep(3)  # Respectful delay if needed
    except Exception as e:
        print(f"[!] Error at sample {sample_index}: {e}")

# Save all merged horror stories to a single file
final_filename = "generated/merged_scientific_lowtemp.txt"
with open(final_filename, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_texts))

print(f"[✓] Merged file saved to: {final_filename}")

In [None]:
# ========== Surreal Narrative Generation ==========
Path("generated_data").mkdir(parents=True, exist_ok=True)
all_texts = []

prompt = """Write a 5000-word surreal narrative that strictly avoids common
function words and overuses low-frequency,obscure, and multisyllabic vocabulary.
The text must contain syntactically correct but semantically dense sentences
composed of rare lexical items, archaic terminology, and technical jargon from
unrelated domains. Refrain from using simple verbs and conjunctions; instead,
prioritize elaborate constructions and repetition of unique, uncommon words.
The story should simulate a linguistic anomaly where typical word frequency
distributions are inverted or randomized, explicitly defying Zipf’s law."""
temperature = 1.3

for sample_index in range(1, 501):
    full_prompt = f"{prompt} This is sample {sample_index} of 500."
    chat_history = [{"role": "user", "content": full_prompt}]

    try:
        reply = chat_with_mistral(chat_history, temperature=temperature)
        all_texts.append(reply)
        print(f"[✓] Generated sample {sample_index}/500")
        time.sleep(3) 
    except Exception as e:
        print(f"[!] Error at sample {sample_index}: {e}")

final_filename = "generated_data/merged_surreal_narrative.txt"
with open(final_filename, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_texts))

print(f"[✓] Merged file saved to: {final_filename}")

In [None]:
# ========== Surreal Narrative Low Temperature Generation ==========
Path("generated_data").mkdir(parents=True, exist_ok=True)
all_texts = []

prompt = """Write a 5000-word surreal narrative that strictly avoids common
function words and overuses low-frequency,obscure, and multisyllabic vocabulary.
The text must contain syntactically correct but semantically dense sentences
composed of rare lexical items, archaic terminology, and technical jargon from
unrelated domains. Refrain from using simple verbs and conjunctions; instead,
prioritize elaborate constructions and repetition of unique, uncommon words.
The story should simulate a linguistic anomaly where typical word frequency
distributions are inverted or randomized, explicitly defying Zipf’s law."""
temperature = 0.2

for sample_index in range(1, 501):
    full_prompt = f"{prompt} This is sample {sample_index} of 500."
    chat_history = [{"role": "user", "content": full_prompt}]

    try:
        reply = chat_with_mistral(chat_history, temperature=temperature)
        all_texts.append(reply)
        print(f"[✓] Generated sample {sample_index}/500")
        time.sleep(3) 
    except Exception as e:
        print(f"[!] Error at sample {sample_index}: {e}")

final_filename = "generated_data/merged_surreal_narrative_lowtemp.txt"
with open(final_filename, "w", encoding="utf-8") as f:
    f.write("\n\n".join(all_texts))

print(f"[✓] Merged file saved to: {final_filename}")