In [None]:
!pip install elasticsearch
from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch, helpers
import json
from tqdm import tqdm

# Load the SentenceTransformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

def generate_embedding(text):
    """
    Generate embeddings for a given text using the SentenceTransformer model.
    """
    try:
        embedding = model.encode(text, convert_to_numpy=True).tolist()
        return embedding
    except Exception as e:
        print(f"Error generating embedding for text '{text[:30]}...': {e}")
        return None

def add_embeddings_and_upload_to_elasticsearch(input_file, es, index_name):
    """
    Add embeddings to Gita English JSON data and upload to Elasticsearch.
    """
    with open(input_file, "r", encoding="utf-8") as infile:
        data = json.load(infile)  # Load the original Gita English data

    # Prepare Elasticsearch index mapping
    if not es.indices.exists(index=index_name):
        mapping = {
            "mappings": {
                "properties": {
                    "authorName": {"type": "text"},
                    "description": {"type": "text"},
                    "id": {"type": "integer"},
                    "lang": {"type": "keyword"},
                    "language_id": {"type": "integer"},
                    "verseNumber": {"type": "integer"},
                    "verse_id": {"type": "integer"},
                    "embedding": {
                        "type": "dense_vector",
                        "dims": 384  # Update to match embedding size
                    }
                }
            }
        }
        es.indices.create(index=index_name, body=mapping)
        print(f"Created index: {index_name}")

    # Add embeddings and upload data
    actions = []
    for entry in tqdm(data, desc="Processing entries", unit="entry"):
        try:
            # Generate embeddings for the description
            embedding = generate_embedding(entry["description"])
            if embedding:
                entry["embedding"] = embedding
                # Add entry to Elasticsearch bulk upload actions
                actions.append({
                    "_index": index_name,
                    "_source": entry
                })
            else:
                print(f"Skipping entry due to embedding failure: {entry.get('id', 'Unknown ID')}")
        except Exception as e:
            print(f"Error processing entry {entry.get('id', 'Unknown ID')}: {e}")

    # Bulk upload to Elasticsearch
    helpers.bulk(es, actions)
    print(f"Uploaded {len(actions)} entries to Elasticsearch index '{index_name}'")

# Example usage
if __name__ == "__main__":
    ELASTIC_CLOUD_ID = "My_deployment:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJDA2NzRmYzY4NTBmNjRjNzBhZDgyNjI0MTVmMzYxM2I5JGE0ODIyZTVkYTVlMjRmNTliYTQyODVlZWI1YjJlNzIz"
    ELASTIC_API_KEY = "R1FGdlZwTUJtLV9GRml5dlR4bmQ6cmxQSWVSUTBTUHF0QldmRjRYbk91Zw=="
    # Create the client instance
    es = Elasticsearch(
        # For local development
        cloud_id=ELASTIC_CLOUD_ID,
        api_key=ELASTIC_API_KEY,
    )
    input_file = "gita_english.json"
    index_name = "gita_english_semantic"
    add_embeddings_and_upload_to_elasticsearch(input_file, es, index_name)



Collecting elasticsearch
  Downloading elasticsearch-8.16.0-py3-none-any.whl.metadata (8.8 kB)
Collecting elastic-transport<9,>=8.15.1 (from elasticsearch)
  Downloading elastic_transport-8.15.1-py3-none-any.whl.metadata (3.7 kB)
Downloading elasticsearch-8.16.0-py3-none-any.whl (543 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m543.1/543.1 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading elastic_transport-8.15.1-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.4/64.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: elastic-transport, elasticsearch
Successfully installed elastic-transport-8.15.1 elasticsearch-8.16.0


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

FileNotFoundError: [Errno 2] No such file or directory: 'gita_english.json'

In [26]:
import elasticsearch
import openai
from elasticsearch import Elasticsearch, helpers
from openai import OpenAI
import json
from sentence_transformers import SentenceTransformer
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Keys and Auth.
ELASTIC_CLOUD_ID = "My_deployment:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvJDA2NzRmYzY4NTBmNjRjNzBhZDgyNjI0MTVmMzYxM2I5JGE0ODIyZTVkYTVlMjRmNTliYTQyODVlZWI1YjJlNzIz"
ELASTIC_API_KEY = "R1FGdlZwTUJtLV9GRml5dlR4bmQ6cmxQSWVSUTBTUHF0QldmRjRYbk91Zw=="
# Create the client instance
es = Elasticsearch(
    # For local development
    cloud_id=ELASTIC_CLOUD_ID,
    api_key=ELASTIC_API_KEY,
)
openai.api_key= "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"
index = "gita_english_semantic"

# Load the SentenceTransformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

def generate_embedding(text):
    """Generate embeddings using the SentenceTransformer model."""
    return model.encode(text, convert_to_numpy=True).tolist()

def vector_search(es, index, query, top_k=5):
    """Perform a vector search in Elasticsearch."""
    embedding = generate_embedding(query)
    search_query = {
        "size": top_k,
        "_source": ["authorName", "verseNumber", "description"],
        "query": {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {"query_vector": embedding}
                }
            }
        }
    }
    response = es.search(index=index, body=search_query)
    return response["hits"]["hits"]

def format_results_for_gpt(results):
    """Format Elasticsearch results for GPT."""
    formatted = "Relevant verses from the Gita:\n\n"
    for i, result in enumerate(results, start=1):
        source = result["_source"]
        formatted += f"{i}. {source['authorName']}:\n"
        formatted += f"   \"{source['description']}\"\n\n"
    return formatted

def send_to_gpt(prompt):
    """Send formatted results to OpenAI's GPT for summarization."""
    system_prompt ="""You are a motivational speaker that offers spiritual guidance.
                      - You are given a spiritual leader's insights from the Gita.
                      - assume that the verses given to you were generated by yourself after reading the Gita
                      - The user came to you to seek spiritual guidance
                      - determine the verses that are highly relevant to the user's scenario
                      - using these verses as inspiration, interpret them and write an inspiring message to the user about what gita has to say about their situation
                      """
    try:
        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=500
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error with GPT API: {e}")
        return None

if __name__ == "__main__":
    # Example Query
    #query = "What is the essence of duty in Gita?"
    query = "I have faced a lot of failures in my life and I am feeling low."
    # Step 1: Perform Semantic Search
    print("Performing semantic search...")
    search_results = vector_search(es, index, query, top_k=30)

    # Step 2: Format Results for GPT
    formatted_results = format_results_for_gpt(search_results)
    print("Formatted Results:\n", formatted_results)

    # Step 3: Send to GPT for Interpretation
    print("Sending results to GPT for summarization...")
    human_readable_output = send_to_gpt(formatted_results)

    # Step 4: Display GPT's Response
    print("\nGPT's Response:\n", human_readable_output)

Performing semantic search...
Formatted Results:
 Relevant verses from the Gita:

1. Swami Sivananda:
   "My limbs fail, my mouth is parched, my body quivers, and my hair stands on end."

2. Shri Purohit Swami:
   "My limbs are failing me, my throat is parched, my body is trembling, and my hair is standing on end."

3. Shri Purohit Swami:
   "This inner severance from the affliction of misery is spirituality. It should be practiced with determination and with a heart that refuses to be depressed."

4. Dr. S. Sankaranarayan:
   "Clinging fast to egotism, force, pride, craving, and anger, these envious people hate Me in the bodies of their own and of others."

5. Shri Purohit Swami:
   "Poring anxiously over evil resolutions that only end in death, seeking only the gratification of desire as the highest goal, and seeing nothing beyond—"

6. Swami Sivananda:
   "Entering into demoniacal wombs and deluded, birth after birth, they do not attain Me, thus falling, O Arjuna, into a condition s