In [14]:
import os
import torch
import torchvision.models as models
from torchvision import transforms
from dotenv import load_dotenv
from PIL import Image
import requests
import json
import google.generativeai as genai
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from groq import Groq
import re

In [25]:
load_dotenv()
gemini_api_key = os.getenv("GEMINI_API_KEY")
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")

### Extracting Image captions

In [16]:
# Load the BLIP processor and model
processor = BlipProcessor.from_pretrained("Sof22/image-caption-large-copy")
model = BlipForConditionalGeneration.from_pretrained("Sof22/image-caption-large-copy")

In [17]:
# Function to generate image captions
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")  # Load the image
    inputs = processor(images=image, return_tensors="pt")  # Preprocess the image
    with torch.no_grad():
        caption_ids = model.generate(**inputs)  # Generate caption IDs
    caption = processor.decode(caption_ids[0], skip_special_tokens=True)  # Decode to text
    return caption

In [18]:
# Test with an image
image_path = "img.jpg" 
caption = generate_caption(image_path)

In [19]:
# Print the generated caption
print("Generated Caption:", caption)

Generated Caption: araffe standing in front of a church with a sky background and a person holding a cell phone


# Integrating LLM for story generation

# Google gemanie

In [23]:
genai.configure(api_key=gemini_api_key)
model = genai.GenerativeModel("models/gemini-1.5-flash")

#settings of the story
#length
n = input("Enter the lenght of story to generate: ")
#theme
theme = input("Enter the theme of the story: ")

# Generate text
prompt = (f"Write a {n}-word long story about {caption} and give it a touch of {theme} theme. "
          "Start your response from the beginning of the story and conclude it at the end.")

# n = input("Enter the lenght of story to generate: ")
result = model.generate_content(prompt)

result = result.candidates[0].content.parts[0].text
print(result)

The giraffe stood silently before the weathered church, its long neck bowed.  A bruised purple sky mirrored the sorrow in its large, gentle eyes.  Below, a lone figure, hunched against the chill wind, held a cell phone, a blurry photo displayed on the screen – a younger giraffe, playful and vibrant, beside a smiling child.  The phone slipped from numb fingers; the image, a painful reminder of a life lost, a bond severed, flickered before fading to black, reflecting the emptiness in the giraffe’s soulful gaze, and in the heart of the mourner.



# Deepseek


In [24]:
#settings of the story
#length
n = input("Enter the lenght of story to generate: ")
#theme
theme = input("Enter the theme of the story: ")

client = Groq(api_key=deepseek_api_key)
completion = client.chat.completions.create(
    model="deepseek-r1-distill-llama-70b",
    messages=[
        {
            "role": "user",
            "content": (f"Write a {n}-word long story about {caption} and give it a touch of {theme} theme. "
                        "Start your response from the beginning of the story and conclude it at the end.")
        }
    ],
    temperature=0.6,
    max_completion_tokens=1024,
    top_p=0.95,
    stream=True,
    reasoning_format="raw"
)

story_output = ""
for chunk in completion:
    story_output += chunk.choices[0].delta.content or ""

# Remove text between <think> and </think>
clean_story = re.sub(r"<think>.*?</think>", "", story_output, flags=re.DOTALL)

# Print the final story
print(clean_story.strip())

The giraffe stood alone in front of the old church, its tall silhouette contrasting against the vast, cloudy sky. A tourist approached, phone in hand, but instead of excitement, their eyes carried sadness. They had come to capture the moment, yet the giraffe’s stillness felt like a mirror to their own loneliness. The phone remained lowered as they thought of someone they once shared such sights with, now gone. The giraffe, unaware, gazed at the heavens, its reflection in a nearby puddle rippling like unspoken tears. The tourist sighed, turned, and walked away, leaving the giraffe to its quiet vigil.


# Qwen 2.5