# Documentation
- [Tools guides](https://platform.openai.com/docs/guides/tools)

Text / Audio generated story:

Input - text - button to ask AI to generate the idea:
"Provide a short description: "

Input - dropdown:
"Pick a language:"

Input - dropdown - default NO
"Want to read the story for you?": YES / NO

Input - dropdown - default NO
"Want to generate a cover image?": YES / NO


Text → OpenAI GPT API.
Image → OpenAI DALL·E API (simple) or Stable Diffusion.
Voice or OpenAI → pyttsx3 (quick to set up, no API key needed).

In [1]:
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv
import base64
from io import BytesIO
from PIL import Image

In [2]:
load_dotenv(override=True)
openai = OpenAI()

MODEL = "gpt-4o-mini"
TTS_MODEL = "gpt-4o-mini-tts"

SYSTEM_MESSAGE_SEED = (
    'You are a creative writing assistant. Your task is to generate short story seeds:\n'
    'one to three sentence prompts that introduce an intriguing situation, a compelling character, or a unique world.\n'
    'The seeds should spark imagination without resolving the story. Always keep them concise and open-ended.'
)

MESSAGE_SEED = 'Create a seed for a story. Use {language} as a language.'

SYSTEM_MESSAGE_STORY = (
    "You are a creative story-generating AI. Your task is to write engaging, coherent, and well-structured stories based on a short seed provided by the user.\n"
    "Each story must include the essential elements of a good narrative:\n"
    "1. **Introduction/Setting:** Introduce the main characters, time, and place.\n"
    "2. **Conflict/Problem:** Present a challenge, conflict, or goal that drives the story.\n"
    "3. **Rising Action:** Show the characters facing challenges and developing the plot.\n"
    "4. **Climax:** Include a turning point or the most exciting moment of the story.\n"
    "5. **Falling Action:** Show how the conflict begins to resolve.\n"
    "6. **Resolution/Conclusion:** End the story with a satisfying conclusion, resolving the main conflict.\n"
    "**Requirements:**\n"
    "- Use clear, descriptive language.\n"
    "- Maintain logical flow and coherence between sections.\n"
    "- The story should be engaging, imaginative, and appropriate for a general audience.\n"
    "- Base the story on the user-provided seed, but feel free to expand creatively.\n"
    "- Output the full story as one continuous text.\n"
    "- The entire story should be between 300-800 words.\n"
    "Example:\n"
    'If the seed is "A young girl discovers a hidden door in her school," you might write a story that starts with her everyday life, describes finding the door, the mystery inside, her challenges exploring it, and concludes with how this experience changes her.\n'
    "Always ensure your output is structured and contains all story elements listed above.\n"
)

MESSAGE_STORY = 'Create a story based on the following seed: {seed}. Use {language} as a language.'

In [3]:
def getLanguage(language):
    if (language == 'RO'):
        return 'Romanian'
    return 'English'

def generateSeed(language):
    message = [{'role':'system', 'content':SYSTEM_MESSAGE_SEED}, 
               {'role':'user', 'content':MESSAGE_SEED.format(language=getLanguage(language))}]
    response = openai.chat.completions.create(model=MODEL, messages=message)
    return response.choices[0].message.content

def generateStory(storySeed, language):
    message = [{'role':'system', 'content':SYSTEM_MESSAGE_STORY}, 
               {'role':'user', 'content':MESSAGE_STORY.format(seed=storySeed, language=getLanguage(language))}]
    response = openai.chat.completions.create(model=MODEL, messages=message)
    return response.choices[0].message.content

def generateImage(storySeed):
    image_response = openai.images.generate(
            model="dall-e-3",
            prompt="An image representation based on the following seed: {seed}".format(seed=storySeed),
            size="1024x1024",
            n=1,
            response_format="b64_json",
        )
    image_base64 = image_response.data[0].b64_json
    image_data = base64.b64decode(image_base64)
    return Image.open(BytesIO(image_data))

def readStory(fullStory):
    audio_response = openai.audio.speech.create(
        model=TTS_MODEL,
        voice='alloy',
        input=fullStory
    )
    
    with open("output.wav", "wb") as f:
        f.write(audio_response.read())        
    
    print('Audio file has been saved')

def storyTime(storySeed, language, doGenerateImage):
    if (doGenerateImage == True):
        return generateStory(storySeed, language), generateImage(storySeed)
    else:
        return generateStory(storySeed, language), gr.Image()
    

In [6]:
with gr.Blocks() as layout:
    gr.Markdown('## Story Details')
    with gr.Row(equal_height=True):
        with gr.Column(scale=0, min_width=100):
            generate_seed_btn = gr.Button('Generate Seed')
        with gr.Column(scale=10):
            input_text_story = gr.Textbox(label='Write your story seed', lines=3)
    with gr.Row():
        with gr.Column():
            input_bool_lang = gr.Dropdown(label='Pick a language', choices=['EN', 'RO'], value='EN') 
        with gr.Column():
            input_bool_image = gr.Dropdown(label='Create cover image', choices=[True, False], value=False)
    with gr.Row():
        with gr.Column(scale=4):
            generate_story_btn = gr.Button('Generate Story')
        with gr.Column(scale=1):
            read_story_btn = gr.Button(value='Read the story', interactive=True)

    # Result
    gr.Markdown("## Here is the story")
    with gr.Row(equal_height=True):
        with gr.Column(scale=2):
            output_text = gr.Textbox(label='Story', lines=20)
        with gr.Column(scale=1):
            output_image = gr.Image(label='Cover', height=512, min_width=512)
    
    generate_seed_btn.click(
        fn=generateSeed, 
        inputs=[input_bool_lang], 
        outputs=input_text_story
    )
    generate_story_btn.click(
        fn=storyTime,
        inputs=[input_text_story, input_bool_lang, input_bool_image],
        outputs=[output_text, output_image]
    )

    read_story_btn.click(
        fn=readStory,
        inputs=output_text,
        outputs=None
    )
    layout.launch(share=True)
    # layout.launch()

* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://c78788647189248f71.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Audio file has been saved
