In [1]:
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
from langchain_community.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path='all_data_filtered.csv')
data = loader.load()

In [12]:
data[0].page_content

"Title: ['ftp server - My Community']\nDescription: ['the ftp server to download code and potential data bases is up again?  ']\nAnswers: ['[yes]']\nlink to forum: ['https://www.vasp.at/forum/viewtopic.php?t=1']"

In [13]:
# Define the path to the pre-trained model you want to use
modelPath = "sentence-transformers/all-MiniLM-l6-v2"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)



In [14]:
text = "This is a test document."
query_result = embeddings.embed_query(text)
len(query_result)

384

In [15]:
db = FAISS.from_documents(data, embeddings)

In [16]:
question = "How to download vasp?"
searchDocs = db.similarity_search(question, k=5)

In [17]:
for i in range(5):
    print(searchDocs[i].page_content)

Title: ['How to download the VASP package? - My Community']
Description: ['Hi to all.\n  At our institution we recently got one licence for the VASP package. I received the email with the passwords for the program and the potentials with no further instructions as how/where do I download the VASP package?\n\n  I apologize for the (maybe) silly question.\n\nThanks in advance and best regards,\nzee']
Answers: ['[please have a look into the "installation of VASP" chapter in the online VASP manual]']
link to forum: ['https://www.vasp.at/forum/viewtopic.php?t=3031']
Title: ['where can I get VASP 4.6.31? - My Community']
Description: ['Dear VASP Master &amp; Fellow Users: \n\n   I will add EFIELD ,but I have heard that only vasp.4.6.31 can do this work.\n   My question is where can I get VASP 4.6.31?\n\nRegards,']
Answers: ['[please ask the head of your vasp-group to download it for you from our ftp server.]']
link to forum: ['https://www.vasp.at/forum/viewtopic.php?t=5037']
Title: ['how to 