In [None]:
#installs
!pip install langchain openai stability-sdk pillow
!pip install langchain-community langchain-core
!pip install wikipedia-api transformers
!pip install wikipedia
!pip install openai==0.28

In [None]:
#imports
import wikipediaapi
from transformers import pipeline, CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
import math
import json
import io
import os
import warnings
import random
import torch
import re
from PIL import Image, ImageDraw, ImageFont
from stability_sdk import client
import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
import langchain
import openai
import stability_sdk
import wikipedia

In [None]:
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

def generate_caption(image, prompt):
    inputs = blip_processor(images=image, return_tensors="pt", prompt=prompt)
    out = blip_model.generate(**inputs)
    caption = blip_processor.decode(out[0], skip_special_tokens=True)
    return caption

def calculate_similarity(text1, text2):
    text1_tokens = clip_processor(text=text1, return_tensors="pt", padding=True, max_length=77, truncation=True)
    text2_tokens = clip_processor(text=text2, return_tensors="pt", padding=True, max_length=77, truncation=True)

    with torch.no_grad():
        text1_features = clip_model.get_text_features(**text1_tokens)
        text2_features = clip_model.get_text_features(**text2_tokens)

    similarity = torch.nn.functional.cosine_similarity(text1_features, text2_features, dim=-1)
    return similarity.item()

def combine_with_context(description, context):
    return f"{context} {description}"

def load_panels(file_path):
    with open(file_path, 'r') as f:
        original_panels = json.load(f)
    return original_panels

wikipedia_context = summary

panel_file_path = 'panels.json'
original_panels = load_panels(panel_file_path)

generated_captions = []
for panel in original_panels:
    image_path = f"panel-{panel['number']}.png"
    image = Image.open(image_path)

    combined_prompt = combine_with_context(panel['description'], wikipedia_context)

    combined_prompt = combined_prompt[:512]

    caption = generate_caption(image, combined_prompt)
    generated_captions.append(caption)

similarities = []
for original, generated in zip(original_panels, generated_captions):
    similarity = calculate_similarity(original['description'], generated)
    similarities.append(similarity)
    print(f"Original: {original['description']}\nGenerated: {generated}\nSimilarity: {similarity}\n")

average_similarity = sum(similarities) / len(similarities)
print(f"Average similarity: {average_similarity}")
