## Install Dependencies

In [None]:
!pip install sentence-transformers
!pip install faiss-cpu

## Combine Old and New Testament

In [2]:
import json

def combine_json_files(input_files, output_file):
    """
    Combines multiple JSON files into a single file with a specific format.

    Args:
        input_files (list): A list of paths to the input JSON files.
        output_file (str): Path to the output JSON file.
    """
    all_entries = []

    for file_path in input_files:
        try:
            with open(file_path, 'r') as f:
                data_raw = json.load(f)
                data = data_raw.get("verses")
                if data:
                    for entry in data:
                        all_entries.append({
                            "reference": entry.get("reference"),
                            "text": entry.get("text")
                        })
                else:
                    print(f"Warning: 'verses' key not found or empty in {file_path}. Skipping.")
        except FileNotFoundError:
            print(f"Error: File not found at {file_path}. Skipping.")
        except json.JSONDecodeError:
            print(f"Error: Could not decode JSON from {file_path}. Skipping.")
        except Exception as e:
            print(f"An unexpected error occurred while processing {file_path}: {e}")

    try:
        with open(output_file, 'w') as f:
            json.dump(all_entries, f, indent=4)
        print(f"Successfully combined {len(input_files)} files into {output_file}")
    except Exception as e:
        print(f"Error writing to output file {output_file}: {e}")

combine_json_files(['ot.json', 'nt.json'], 'bible_verses.json')

print("Files combined successfully!")

Successfully combined 2 files into bible_verses.json
Files combined successfully!


## Import Model and Create Vector Embeddings

In [3]:
from sentence_transformers import SentenceTransformer
import json
import numpy as np

model = SentenceTransformer('paraphrase-MiniLM-L3-v2')

with open('bible_verses.json', 'r') as f:
    bible_data = json.load(f)

verse_texts = [item['text'] for item in bible_data]

print(f"Loaded {len(verse_texts)} verses.")

print("Generating embeddings... This may take a few minutes.")
verse_embeddings = model.encode(verse_texts, show_progress_bar=True)

print("Embeddings generated successfully.")
print("Shape of embeddings:", verse_embeddings.shape)

np.save('bible_embeddings.npy', verse_embeddings)

print("Embeddings saved to bible_embeddings.npy")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/69.6M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loaded 31102 verses.
Generating embeddings... This may take a few minutes.


Batches:   0%|          | 0/972 [00:00<?, ?it/s]

Embeddings generated successfully.
Shape of embeddings: (31102, 384)
Embeddings saved to bible_embeddings.npy


## Convert Vector Embeddings into FAISS Index

In [4]:
import numpy as np
import faiss

print("Loading embeddings from file...")
verse_embeddings = np.load('bible_embeddings.npy').astype('float32')

d = verse_embeddings.shape[1]

quantizer = faiss.IndexFlatL2(d)

nlist = 128
m = 48
bits = 8
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, bits)

print("Training the index...")
index.train(verse_embeddings)

print("Adding vectors to the index...")
index.add(verse_embeddings)

print(f"Quantized index built successfully.")
print(f"Number of vectors in the index: {index.ntotal}")

faiss.write_index(index, "bible_verse_index.faiss")
print("Quantized index saved.")

Loading embeddings from file...
Training the index...
Adding vectors to the index...
Quantized index built successfully.
Number of vectors in the index: 31102
Quantized index saved.


## Read in FAISS Index and Test with Query

In [5]:
with open('bible_verses.json', 'r') as f:
    bible_data = json.load(f)

index = faiss.read_index("bible_verse_index.faiss")
index.nprobe = 10

model = SentenceTransformer('paraphrase-MiniLM-L3-v2')
print("Data loaded successfully.")

Data loaded successfully.


In [6]:
query_vector = model.encode(["God is love"])

In [7]:
distances, indices = index.search(query_vector, 11)

In [8]:
results = []
for i in range(0, 11):
    verse_index = indices[0][i]
    result_verse = bible_data[verse_index]
    results.append({
        "reference": result_verse['reference'],
        "text": result_verse['text'],
        "distance": float(distances[0][i])
    })

In [9]:
for result in results:
    print(f"Verse: {result['reference']}")
    print(f"Text: {result['text']}")
    print(f"Distance: {result['distance']}")
    print()

Verse: 1 John 4:7
Text: Beloved, let us love one another: for love is of God; and every one that loveth is born of God, and knoweth God.
Distance: 17.25106430053711

Verse: 1 John 4:16
Text: And we have known and believed the love that God hath to us. God is love; and he that dwelleth in love dwelleth in God, and God in him.
Distance: 17.91141700744629

Verse: 1 John 4:12
Text: No man hath seen God at any time. If we love one another, God dwelleth in us, and his love is perfected in us.
Distance: 18.72039031982422

Verse: 1 John 4:11
Text: Beloved, if God so loved us, we ought also to love one another.
Distance: 19.435041427612305

Verse: 1 John 5:2
Text: By this we know that we love the children of God, when we love God, and keep his commandments.
Distance: 19.47077178955078

Verse: Ephesians 2:4
Text: But God, who is rich in mercy, for his great love wherewith he loved us,
Distance: 19.562076568603516

Verse: Titus 3:4
Text: But after that the kindness and love of God our Saviour tow