In [1]:
from rag_2 import parse_experiments, init_chroma_db, preprocess_text_to_chroma, get_inference, chunk_embed_text
from transformers import BertModel, BertTokenizer, DPRQuestionEncoder, DPRQuestionEncoderTokenizer, RobertaModel, RobertaTokenizer
from sentence_transformers import SentenceTransformer
import torch

class BertEmbeddings:
    def __init__(self, model_name='bert-base-uncased'):
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertModel.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()
    

class RobertaEmbeddings:
    def __init__(self, model_name='roberta-base'):
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name)
        self.model = RobertaModel.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()
    
class SentenceBertEmbeddings:
    def __init__(self, model_name='paraphrase-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embeddings(self, text: str):
        return self.model.encode(text).tolist()
    
class DprEmbeddings:
    def __init__(self, model_name='facebook/dpr-question_encoder-single-nq-base'):
        self.tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(model_name)
        self.model = DPRQuestionEncoder.from_pretrained(model_name)

    def embeddings(self, text: str):
        inputs = self.tokenizer(text, return_tensors='pt')
        outputs = self.model(**inputs)
        return outputs.pooler_output.detach().numpy()[0].tolist()


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
    
get_embeddings = SentenceBertEmbeddings() 
#Who led the Experiment 1?

# Get the secret experiment text
text = parse_experiments()

# Get the Vector Database Client
chroma_client, vector_store = init_chroma_db("documents-2")

# Put the secret experiments in the vector database
preprocess_text_to_chroma(text=text, get_embeddings=get_embeddings, vector_store=vector_store)



In [3]:
# Get the query from the end user, search in the vector database.
question = input("Please enter question: ")

# Prepara the data and get the answer
response, prompt, db_results = get_inference(question, get_embeddings, vector_store)

print("\n================================\n")
print(f"Prompt: {prompt}")
print("\n================================\n")
print(f"Database Results: {db_results}")
print("\n================================\n")
print(f"Response: {response['message']['content']}")
print("\n================================\n")



Prompt: DOCUMENT:
# Experiment 1
## May 23, 2024
The first experiment focused on using palladium electrodes submerged in heavy water (deuterium oxide, D2O). Dr. Emily D. Jensen, Senior Physicist, led this trial. The procedure involved electrolysis at a constant current of 50 mA, aiming to induce cold fusion within the palladium lattice. Throughout the 12-hour process, temperatures were carefully monitored, maintaining a steady 25°C. Voltage readings were recorded every hour to observe any anomalies indicating fusion events. The experiment yielded promising preliminary results with minor heat generation detected, suggesting potential excess energy beyond chemical reactions.
To ensure accuracy, additional measurements included the analysis of gas output. Both hydrogen and deuterium gas levels were monitored using a gas chromatograph, which revealed an increase in deuterium gas concentration over time. This increase suggested that deuterium nuclei might be fusing within the palladium la