# Set up your environment and build your first story from a text prompt

In [None]:
# @title
%env GEMINI_API_KEY=AIzaSyAMG5M6jIypoSlGcdL0fkSK_NTI__DAY5k

In [None]:
!pip install -q transformers pillow google-generativeai

In [None]:
from google import genai
import os
client=genai.Client()

In [None]:
if "GEMINI_API_KEY" not in os.environ:
  print("Please set you Gemini key in the environment variable GEMINI_API_KEY")
else:
    client=genai.Client()
    MODEL="gemini-2.5-flash"

In [None]:
prompt=input("Enter your Story prompt and press enter:\n")
if prompt.strip()=="":
  print("No prompt entered , Exiting.")
else:
  print(f"Genetrating story for prompt: {prompt}")
  print("It may take few seconds")
  try:
    resp=client.models.generate_content(model=MODEL,contents={prompt})
    print("\n----Generated Story----\n")
    print(resp.text)
  except Exception as e:
    print(f"Error occurred while generating story: {e}")

# Turn a single image into a story using AI captioning

In [None]:
!pip install -q transformers pillow google-generativeai timm

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from google import genai
import os
import io

In [None]:
if "GEMINI_API_KEY" not in os.environ:
  print("Please set you Gemini key in the environment variable GEMINI_API_KEY")
else:
    client=genai.Client()
    MODEL="gemini-2.5-flash"

In [None]:
processor=BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model=BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

In [None]:
from google.colab import files
uploaded=files.upload()

for fn in uploaded.keys():
  image=Image.open(fn).convert('RGB')
  display(image)

In [None]:
inputs=processor(images=image,return_tensors="pt")
out=model.generate(**inputs)

caption=processor.decode(out[0],
skip_special_tokens=True)

print("Caption generated by BLIP: ")
print(caption)

In [None]:
story_prompt=(f"Write a Short story(around 500-700 words) base on this scene description: {caption}")
print(story_prompt)

print("Sending this to Gemini. \n" )

response = client.models.generate_content(model=MODEL, contents=story_prompt)
story=response.text
print("\n----Generated Story----\n")
print(story)

In [None]:
with open("generted_story.txt","w") as f:
  f.write(story)

from google.colab import files
files.download("generted_story.txt")

# Create a multi-image narrative with sequencing and coherence

In [None]:
!pip install -q ipywidgets

In [None]:
from google.colab import files
from PIL import Image
import io

uploaded=files.upload()

images=[]
image_names=[]

for name,file in uploaded.items():
  image=Image.open(io.BytesIO(file)).convert('RGB')
  image_names.append(name)
  images.append(image)
  display(image)

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration

processor=BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model=BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

captions=[]

for img in images:
  inputs=processor(images=img,return_tensors='pt')
  out=blip_model.generate(**inputs,max_new_tokens=30)
  caption=processor.decode(out[0],skip_special_tokens=True)
  captions.append(caption)

  print("Captions generated from images:")
  for i,caption in enumerate(captions):
    print(f"{image_names[i]}: {caption}")

In [None]:
import ipywidgets as widgets
from IPython.display import display , clear_output

tone_dropdown=widgets.Dropdown(
    options=["whinsical","adventurous","suspenseful","romantic","Sci-fi","mystery"],
    value="whinsical",
    description="Tone:",
)

length_dropdown=widgets.Dropdown(
    options=["Short(100-200 words)",
             "Medium(300-400 words)",
             "Long(500-700 words)"],
    value="Medium(300-400 words)",
    description="Length:",
)

genetrate_button=widgets.Button(description="Generate Story")
output_box=widgets.Output()

display(tone_dropdown,length_dropdown,genetrate_button,output_box)

In [None]:
def on_generate_button_clicked(b):
  with output_box:
    clear_output()

    tone=tone_dropdown.value
    length_map={
        "Short(100-200 words)":"100-200 words",
        "Medium(300-400 words)":"300-400 words",
        "Long(500-700 words)":"500-700 words"
    }
    length=length_map[length_dropdown.value]

    caption_prompt="\n".join([f"-{c}"for c in captions])

    outline_prompt=(
        f"Using the following scene descriptions, create a 4-chapter story outline. "
            f"Each chapter should have a title and a short summary.\n\n"
            f"{caption_prompt}\n\nOutline"
    )

    try:
        outline_response = client.models.generate_content(model=MODEL, contents=outline_prompt)
        outline = outline_response.text
        print("Story Outline:\n")
        print(outline)

        full_story = ""
        for i in range(1, 5):
          chapter_prompt = (
              f"Using the outline below, write Chapter {i} in a {tone} tone. "
              f"Make it {length}. Add vivid details, good pacing, and consistent characters.\n\n"
              f"{outline}\n\nChapter {i}"
          )

          chapter_response = client.models.generate_content(model=MODEL, contents=chapter_prompt)
          chapter_text = chapter_response.text
          print(f"\n Chapter {i}:\n")
          print(chapter_text)
          full_story += f"Chapter {i}:\n{chapter_text}\n\n"


        with open("multi_image_story.txt", "w") as f:
            f.write(full_story)
        print("\n Story saved as multi_image_story.txt")

        from google.colab import files
        files.download("multi_image_story.txt")

    except Exception as e:
      print("Error generating story:", e)

genetrate_button.on_click(on_generate_button_clicked)

# Edit, Style and export stories with safety controls

In [None]:
!pip install -q gtts reportlab
!pip install -q reportlab

In [None]:
story_text ="""
**Chapter 1: Whispers of Autumn's Past**

*   **Summary:** Sunlight filters through the canopy of an autumn forest, illuminating a carpet of fallen leaves. Maya and Alex walk slowly, the crisp air doing little to lift Alex's quiet demeanor. Alex, once a promising soccer player, is lost in thought, a silent burden weighing on him. He occasionally glances at his phone, where a faded "graphic of a soccer player running with a ball in his hand" serves as a poignant reminder of a dream he once held tight, a dream that now feels distant. Maya senses his melancholic reflection, knowing the path ahead is as much about healing as it is about hiking.

**Chapter 2: The Ascent of Uncertainty**

*   **Summary:** The two friends begin their climb up a challenging mountain trail. The path is steep and winding, mirroring the emotional ascent Alex faces in coming to terms with his past. With every step, the majestic mountain peak looms larger in the background, a formidable symbol of the obstacles Alex believes stand between him and any future fulfillment. Maya, a steadfast companion, tries to engage him in conversation, gently nudging him to confront the disappointment of an injury that shattered his professional soccer ambitions, the very dream depicted in his memory.

**Chapter 3: The Dream's Lingering Embrace**

*   **Summary:** High on the trail, Alex pauses, momentarily lost in a vivid flashback or a powerful, almost dreamlike, vision. The "graphic of a soccer player running with a ball in his hand" comes alive in his mind – not a literal game scenario, but a symbolic image of his younger self, brimming with unadulterated passion, the soccer ball feeling like an extension of his very being. This raw, intimate connection to the sport, before the injury, surges through him, bringing both a pang of loss and a renewed sense of the profound joy the game once gifted him, intensifying his internal struggle amidst the arduous climb.

**Chapter 4: The Open Field of New Beginnings**

*   **Summary:** Finally, Alex and Maya break through the tree line, reaching a wide, "grassy field with a mountain in the background." The view is breathtaking, expansive and serene. Standing on the vast, open space, with the imposing peak now behind them, Alex feels a profound shift. He realizes that while his professional dream may have ended, the spirit of the game and the joy it brings are not lost. This tranquil field, rather than a battleground, becomes a canvas for new possibilities, a place where he can embrace a different path – perhaps coaching, or simply rediscovering the simple love for the game – with the wisdom gained from his journey up the mountain, a journey shared and supported by Maya.
"""

In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def export_pdf(text, filename="story.pdf"):
  c = canvas.Canvas(filename, pagesize=letter)
  width,height=letter
  text_object=c.beginText(40,height-40)
  text_object.setFont("Helvetica",12)

  for line in text.split("\n"):
    for subline in[line[i:i+90] for i in range(0,len(line),90)]:
      text_object.textLine(subline)
  c.drawText(text_object)
  c.showPage()
  c.save()

export_pdf(story_text)

from google.colab import files
files.download("story.pdf")

In [None]:
from gtts import gTTS
from IPython.display import Audio
from google.colab import files

voices = {
    "Default English (US Female)": {"lang": "en", "tld": "com"},
    "British Accent": {"lang": "en", "tld": "co.uk"},
    "Australian Accent": {"lang": "en", "tld": "co.au"},
    "Pakistani Accent": {"lang": "en", "tld": "co.pk"},
    "Slow Reading Voice": {"lang": "es", "tld": "com", "slow": True},
}

for label,option in voices.items():
  print(f"Generating Audio: {label}")

  tts=gTTS(
      text=story_text,
      lang=option["lang"],
      #tld=option.get("tld","com"), # Removed tld to use default
      slow=option.get("slow",False),

  )

  filename = f"{label.replace(' ','_').lower()}.mp3"

  tts.save(filename)

  display(Audio(filename=filename,autoplay=False))

  files.download(filename)

# Build and showcase your AI StoryTeller app with Streamlit

In [None]:
%%writefile app_streamlit_story.py
import streamlit as st #web app framework
from PIL import Image
import io, requests, os
import textwrap
from gtts import gTTS #translate text to speech
from transformers import BlipProcessor, BlipForConditionalGeneration
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.utils import ImageReader
from pyngrok import ngrok
import tempfile
import google.generativeai as genai
import torch

#Authencation
NGROK_AUTH_TOKEN = "33T8ek51WYsEozKcynmWiITi3Er_89HiwrnWpjJqcEYGeTPPo" # <--- Replace with your ngrok authtoken
BACKGROUD_IMAGE_URL = "https://i.postimg.cc/NF8HqzgR/web-back.avif"
GEMINI_API_KEY = "AIzaSyAMG5M6jIypoSlGcdL0fkSK_NTI__DAY5k"

#SteamLit Page Setup/Style
st.set_page_config(page_title="StoryTeller",layout="wide")

st.markdown(
    f"""
    <style>
    .stApp {{
        background-image: url("{BACKGROUD_IMAGE_URL}");
        background-size: cover;
        background-attachment: fixed;
    }}
     selection[data-testid="stSidebar"] {{
        background: rgba (0,0,0,0.3);
        backdrop-filter: blur(10px);
        border-radius: 12px;
        padding: 10px;
   }}
   div[data-testid="stFileUploader"] {{
        background: rgba (255,255,255,0.2);
        border-radius: 10px;
        padding: 10px;
   }}
   html, body, h1, h2, h3, h4, h5, h6, p, div, span, label, li, input, textarea {{
      color: #93ABAC !important;
   }}
   .stButton>button, .stDownloadButton>button {{
      color: #93ABAC !important;
      border-color: #93ABAC;
   }}
    </style>
    """,
    unsafe_allow_html=True,
)

st.title("Multi-Image AI StoryTeller")
st.markdown("Upload Images → Generate Story → Export as PDF and MP3")

with st.sidebar:
  tone = st.selectbox("Tone", ["Wholesome", "Adventurous", "Suspenseful", "Romantic", "Sci-fi", "Mystery"])
  length_label = st.selectbox("Length", ["Short (200-300 words)", "Medium (300-600 words)", "Long (600-1000 words)"])
  strat_ngrok = st.checkbox("Start ngrok tunnel")

  if strat_ngrok:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    url = ngrok.connect(8501)
    st.success(f"Public URL: {url}")

uploaded_images = st.file_uploader("Upload multiple images", type=["jpg", "jpeg", "png"], accept_multiple_files=True)

#Caption model
@st.cache_resource
def load_models():
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") .to("cuda" if torch.cuda.is_available() else "cpu")
  return processor, model

processor, blip_model = load_models()

#Config gemini
genai.configure(api_key=GEMINI_API_KEY)
@st.cache_resource
def load_gemini_model():
  return genai.GenerativeModel(model_name="models/gemini-2.5-flash")

gemini_model = load_gemini_model()

#Captioning the images
def get_captions(images):
  captions = []
  for img in images:
    if img.mode != "RGB":
      img = img.convert("RGB")
    inputs = processor(images=img, return_tensors="pt").to(blip_model.device)
    out = blip_model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    captions.append(caption)
  return captions


def generate_story(captions, tone, length_label):
  length_map = {
      "Short (200-300 words)": (200, 300, 800),
      "Medium (300-600 words)": (300, 600, 1200),
      "Long (600-1000 words)": (600, 1000, 1600)
  }
  min_words, max_words, max_tokens = length_map.get(length_label, (300,600, 1200))

  prompt = (
      f"you are a creative writer. Write a {tone.lower()} story based on the following image captions:"
      +"\n".join([f"- {cap}" for cap in captions])
      + f"\n\nThe story should be vivid, engaging, and emotionally rich, with a coherent beginning"
      + f"\nMake it approximately between {min_words} and {max_words} word long."
  )

  try:
    response = gemini_model.generate_content(
      contents=prompt,
      generation_config=genai.GenerationConfig(
          temperature=0.9,
          top_p=0.95,
          max_output_tokens=max_tokens,
      )
    )
    return response.text.strip()
  except Exception as e:
    return f"Error generating story: {e}"


#Pdf generation
def create_pdf(story_text, images):
  buffer = io.BytesIO()
  c = canvas.Canvas(buffer, pagesize=A4)
  w, h = A4

  try:
    bg_img = Image.open(requests.get(BACKGROUD_IMAGE_URL, stream=True).raw).convert("RGB")
    bg = ImageReader(bg_img)
    c.drawImage(bg, 0, 0, width=w, height=h)
  except:
    pass

  c.setFont("Helvetica-Bold", 16)
  c.drawString(50, h - 50, "Generated Story")

  text = textwrap.wrap(story_text, 100)
  y = h - 80
  for line in text:
    if y < 80:
      c.showPage()
      y = h - 80
    c.drawString(50, y, line)
    y -= 15

  if images:
    c.showPage()
    c.setFont("Helvetica-Bold", 16)
    c.drawString(50, h - 50, "Uploaded Images")
    x, y = 50, h - 150
    for img in images:
      img_thumbnail = img.copy()
      img_thumbnail.thumbnail((200, 200))
      c.drawImage(ImageReader(img_thumbnail), x, y, width=img_thumbnail.width, height=img_thumbnail.height)
      x += 220
      if x > w - 200:
        x = 50
        y -= 220

  c.save()
  buffer.seek(0)
  return buffer

#Audio generation
def create_audio(story):
  audio_bytes = io.BytesIO()
  tts = gTTS(story)
  tts.write_to_fp(audio_bytes)
  audio_bytes.seek(0)
  return audio_bytes

#Processing part
if st.button("Generate Story") and uploaded_images:
  pil_images = [Image.open(img) for img in uploaded_images]
  with st.spinner("Generating captions..."):
    captions = get_captions(pil_images)
    for i, cap in enumerate(captions):
      st.write(f"**Image {i+1}**: {cap}")

  with st.spinner("Generating story..."):
    story = generate_story(captions, tone, length_label)
    st.success("Story generated!")
    st.write(story)

  with st.spinner("Creating PDF..."):
    pdf_file = create_pdf(story, pil_images)
    st.download_button("Download PDF", data=pdf_file, file_name="story.pdf", mime="application/pdf")

  with st.spinner("Creating Audio..."):
    audio = create_audio(story)
    st.audio(audio)
    st.download_button("Download Audio as MP3", data=audio, file_name="story.mp3", mime="audio/mpeg")

elif not uploaded_images:
  st.warning("Upload at least one image to begin.")

In [None]:
ngrok.kill()

In [None]:
!pip install -q streamlit pyngrok transformers torch gtts reportlab pillow

!streamlit run app_streamlit_story.py --server.port 8501 &>/content/log.txt &

from pyngrok import ngrok
ngrok.set_auth_token("33T8ek51WYsEozKcynmWiITi3Er_89HiwrnWpjJqcEYGeTPPo")
url = ngrok.connect(8501)
print("Public URL:", url)