In [1]:
import pandas as pd
import numpy as np
import torch
torch.classes.__path__ = []
from diffusers import StableDiffusionPipeline
from transformers import AutoTokenizer, AutoModel
import faiss
import networkx as nx
import os
import gdown
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load Bible Data
df = pd.read_csv("t_bbe.csv")  # Ensure CSV has a "text", "book", "chapter", and "verse" column
book_names = {1:'Genesis',2:'Exodus',3:'Leviticus',4:'Numbers',5:'Deuteronomy',6:'Joshua',7:'Judges',8:'Ruth',9:'1 Samuel',10:'2 Samuel',11:'1 Kings',12:'2 Kings',13:'1 Chronicles',14:'2 Chronicles',15:'Ezra',16:'Nehemiah',17:'Esther',18:'Job',19:'Psalms',20:'Proverbs',21:'Ecclesiastes',22:'Song of Solomon',23:'Isaiah',24:'Jeremiah',25:'Lamentations',26:'Ezekiel',27:'Daniel',28:'Hosea',29:'Joel',30:'Amos',31:'Obadiah',32:'Jonah',33:'Micah',34:'Nahum',35:'Habakkuk',36:'Zephaniah',37:'Haggai',38:'Zechariah',39:'Malachi',40:'Matthew',41:'Mark',42:'Luke',43:'John',44:'Acts',45:'Romans',46:'1 Corinthians',47:'2 Corinthians',48:'Galatians',49:'Ephesians',50:'Philippians',51:'Colossians',52:'1 Thessalonians',53:'2 Thessalonians',54:'1 Timothy',55:'2 Timothy',56:'Titus',57:'Philemon',58:'Hebrews',59:'James',60:'1 Peter',61:'2 Peter',62:'1 John',63:'2 John',64:'3 John',65:'Jude',66:'Revelation'}

# Map book names to dataframe
df['Book Name'] = df['b'].map(book_names)

In [3]:
# get stopwords from NLTK library but remove some to keep positional words in the text for image generation purposes
stop_words = ['to', 'any', "he'd", "we've", 'this', 'have', 'whom', "isn't", "wasn't", 'own', 'now', 'do', "mightn't", 'but', 'yourselves', "i've", 'is', "haven't", "he's", 'your', "you've", 'the', "she'll", 'did', "you'll", 'until', "wouldn't", 'than', "didn't", 'then', 'with', 'and', "should've", 'few', "it'll", 'which', 'why', "we're", 'should', 'other', "i'll", 'an', 'been', "needn't", "hasn't", 'will', 'only', "we'll", "we'd", 'what', "you'd", "shouldn't", 'me', "i'd", 'were', "aren't", 'so', "she's", "hadn't", 'o', 'ours', "they've", 'very', "don't", 'further', 'it', 'by', 'once', 'if', 'doing', 'are', 'no', 'i', 'yours', 'about', "she'd", 'most', 'how', "mustn't", 'as', 'myself', 'being', 'was', 'or', 'when', "they're", "couldn't", 'who', 'my', "doesn't", 'where', 'yourself', 'for', 'its', "won't", 'such', "he'll", 'be', 'after', 'these', 'that', "shan't", "they'll", 'nor', 'they', 'having', 'too', 'himself', 'those', "i'm", 'itself', 'just', 'while', 'does', "that'll", 'theirs', "they'd", 'can', 'of', 'am', 'because', "it'd", 'more', 'you', "weren't", 'we', 'themselves', 'ourselves', 'a', "you're", 'our']
df['corpus'] = df['t'].astype(str).str.lower().apply(
    lambda x: ' '.join([word for word in x.split() if word not in stop_words])
)

In [5]:
def load_embeddings_and_index():
    """Download and load embeddings and FAISS index."""
    
    # Define file paths
    emb_file = "bible_embeddings.npy"
    index_file = "bible_faiss.index"
    
    # Download embeddings if not present
    if not os.path.exists(emb_file):
        gdown.download("https://drive.google.com/uc?id=1-z5RDrWKn13t65PmsWb4FhOGyRcJbOpB", emb_file, quiet=False)
    
    # Download FAISS index if not present
    if not os.path.exists(index_file):
        gdown.download("https://drive.google.com/uc?id=1I7sqgWmMjFcjqDVic73IMPXK8tehcX-A", index_file, quiet=False)

    # Load files
    embeddings = np.load(emb_file, allow_pickle=True)
    index = faiss.read_index(index_file)

    return embeddings, index

embeddings, index = load_embeddings_and_index()


In [6]:
print(os.path.exists("bible_embeddings.npy"))

True


In [7]:
print(os.path.exists("bible_faiss.index")) 

True


In [None]:
# Load Sentence-BERT model
def load_model():
    """Load Sentence-BERT model."""
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    return tokenizer, model

tokenizer, model = load_model()

## Create image from words/phrases using BERT model

In [None]:
def find_nearest_text(embedding, embeddings, df, top_k=1):
    """Find the most similar verse to a given embedding using cosine similarity."""
    scores = cosine_similarity(embedding, embeddings)[0]
    top_indices = scores.argsort()[-top_k:][::-1]
    return df.iloc[top_indices]['t'].values[0]  # Return most relevant verse text

# Step 1: Get BERT embedding from input text (already in your code)
embedding = get_embedding("Your input verse here")

# Step 2: Retrieve the closest verse in the dataset
nearest_text = find_nearest_text(embedding, embeddings, df)

# Step 3: Generate image using Stable Diffusion
generate_image_from_text(nearest_text)

## Create image from selected bible verse

In [9]:
# Function to get embedding for a new query
def get_embedding(text):
    """Generate embedding for the given text using Sentence-BERT."""
    tokens = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        output = model(**tokens)
    return output.last_hidden_state[:, 0, :].numpy().astype(np.float32)

In [10]:
text = "A woman clothed with the sun, and the moon under her feet, and upon her head a crown of twelve stars"

In [15]:
# Initialize the Stable Diffusion pipeline (text-to-image)
def load_image_generator():
    model_id = "runwayml/stable-diffusion-v1-5"
    pipe = StableDiffusionPipeline.from_pretrained(model_id)#, torch_dtype=torch.float16)
    #pipe = pipe.to("cuda")  # Ensure CUDA is available
    pipe = pipe.to("cpu")
    return pipe

image_generator = load_image_generator()

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [16]:
# Function to generate image from verse text
def generate_image_from_text(text, output_path="bible_verse_image.png"):
    image = image_generator(text).images[0]
    image.save(output_path)
    print(f"Image saved to {output_path}")

In [17]:
generate_image_from_text(text)

  0%|          | 0/50 [00:00<?, ?it/s]

Image saved to bible_verse_image.png


In [19]:
prompt = f"{text}, digital painting, concept art, highly detailed, epic lighting"

In [20]:
# Load model
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
pipe = pipe.to("cpu")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [21]:
image = pipe(prompt).images[0]
image.show()

  0%|          | 0/50 [00:00<?, ?it/s]