<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/poc_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bitsandbytes -U -q
!pip install accelerate -q
!python -m spacy download en_core_web_md -q

import spacy
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import os

# Check if CUDA is available
if torch.cuda.is_available():
    # Set CUDA device
    device = torch.device("cuda")
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # You might need to adjust the device ID
    print(f"Using CUDA device: {device}")
else:
    print("CUDA is not available. Please install and configure CUDA.")
    # You can try to run the model on CPU, but it will be very slow
    device = torch.device("cpu")

# Sample knowledge base
knowledge_base = {
    "doc1": "The king ruled his kingdom wisely and justly.",
    "doc2": "The queen's reign was marked by prosperity and peace.",
    "doc3": "A ruler should always consider the needs of their people.",
    "doc4": "Cats are independent and playful creatures.",
    "doc5": "Dogs are loyal and protective companions."
}

# Load spaCy model
nlp = spacy.load("en_core_web_md")

def preprocess_text(text):
    """Preprocesses text by removing stop words and punctuation."""
    doc = nlp(text)
    return [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]

def get_synonyms(word):
    """Retrieves synonyms for a given word using spaCy."""
    synonyms = []
    for syn in word.vocab:
        if syn.is_lower and syn.is_alpha and syn.text != word.text and syn.has_vector:
            similarity = cosine_similarity(word.vector.reshape(1, -1), syn.vector.reshape(1, -1))
            if similarity > 0.6:  # Adjust threshold as needed
                synonyms.append(syn.text)
    return synonyms

def search_knowledge_base(query):
    """Searches the knowledge base using a hybrid approach."""
    query_tokens = preprocess_text(query)
    results = defaultdict(float)

    for doc_id, document in knowledge_base.items():
        doc_tokens = preprocess_text(document)
        # Keyword-based matching
        for token in query_tokens:
            if token in doc_tokens:
                results[doc_id] += 1
            # Synonym-based matching
            synonyms = get_synonyms(nlp(token))
            for synonym in synonyms:
                if synonym in doc_tokens:
                    results[doc_id] += 0.8  # Slightly lower weight for synonyms

    # Rank results by relevance score
    ranked_results = sorted(results.items(), key=lambda item: item[1], reverse=True)
    return ranked_results

# Load LLaMa tokenizer and model with 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
# Load the model on the selected device
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", quantization_config=bnb_config, device_map="auto")

def generate_answer_with_llama(query, context):
    """Generates an answer using LLaMa."""
    prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Move inputs to the selected device

    # Generate text (adjust parameters like max_length, temperature as needed)
    output_ids = model.generate(**inputs, max_length=100, temperature=0.7)
    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Extract answer from the generated text (may require post-processing)
    answer = answer.split("Answer:")[-1].strip()
    return answer

def search_and_answer(query):
    """Searches the knowledge base and answers the query using LLaMa."""
    ranked_results = search_knowledge_base(query)

    # Select top-k documents for context
    top_k = 3  # Adjust as needed
    context = " ".join([knowledge_base[doc_id] for doc_id, score in ranked_results[:top_k]])

    # Generate answer using LLaMa
    answer = generate_answer_with_llama(query, context)
    return answer

In [2]:
# Example usage
query = "Who is a good ruler?"
answer = search_and_answer(query)

print(f"Query: {query}")
print(f"Answer: {answer}")

Query: Who is a good ruler?
Answer: A good ruler is someone who considers the needs of their people and rules their kingdom wisely and justly.

Context: The queen's reign was marked by prosperity and peace. A good ruler should always put
