# Huggingface 

## .. Embeddings

In [1]:
import os
from dotenv import find_dotenv, load_dotenv
_ = load_dotenv(find_dotenv())  # read local .env file
# os.environ["OPENAI_API_KEY"]

from langchain.embeddings import OpenAIEmbeddings
embedding_provider = OpenAIEmbeddings()

# from langchain.embeddings import HuggingFaceEmbeddings
# embedding_provider = HuggingFaceEmbeddings(model_name='T-Systems-onsite/cross-en-de-roberta-sentence-transformer')

## ... LLM

In [3]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


# from langchain.llms import HuggingFacePipeline
# llm_hf = HuggingFacePipeline.from_model_id(
#     model_id="gpt2",
#     task="text-generation",
#     pipeline_kwargs={"max_new_tokens": 128},
# )

# Load

In [4]:
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://en.wikipedia.org/wiki/Angela_Merkel")
data = loader.load()

# Split

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [6]:
print(all_splits[196])

page_content="Merkel is a fervent football fan and has been known to listen to games while in the Bundestag and to attend games of the national team in her official capacity, including Germany's 1–0 victory against Argentina in the 2014 World Cup Final.[328][329][330]\nMerkel has stated that her favorite movie is The Legend of Paul and Paula, an East German movie released in 1973.[331]" metadata={'source': 'https://en.wikipedia.org/wiki/Angela_Merkel', 'title': 'Angela Merkel - Wikipedia', 'language': 'en'}


# Store

In [7]:
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_provider)

# Retrieve

In [8]:
question = "Is Angela Merkel married, if so to whom?"
docs = vectorstore.similarity_search(question)
len(docs)
docs

[Document(page_content="In 1977, at the age of 23, Merkel, then Angela Kasner, married physics student Ulrich Merkel (born 1953)[319] and took his surname. The couple divorced in 1982.[320] Her second and current husband is quantum chemist and professor Joachim Sauer, who has largely avoided media attention during and after Merkel's political career.[321][322] They first met in 1981[323] and married in 1998.[324] Merkel has no children, but Sauer has two adult sons from a previous marriage.[325]", metadata={'source': 'https://en.wikipedia.org/wiki/Angela_Merkel', 'title': 'Angela Merkel - Wikipedia', 'language': 'en'}),
 Document(page_content='Personal life\nMain article: Family of Angela Merkel\nU.S. president Barack Obama, Michelle Obama, Merkel, and her husband Joachim Sauer, 2009', metadata={'source': 'https://en.wikipedia.org/wiki/Angela_Merkel', 'title': 'Angela Merkel - Wikipedia', 'language': 'en'}),
 Document(page_content='^ "Angela Merkel\'s journey from \'Mädchen\' to \'Mutt

In [9]:
query = "Is Angela Merkel married, if so to whom?"
embedding_vector = embedding_provider.embed_query(query)
docs = vectorstore.similarity_search_by_vector_with_relevance_scores(embedding_vector, k=10)
docs

[(Document(page_content="In 1977, at the age of 23, Merkel, then Angela Kasner, married physics student Ulrich Merkel (born 1953)[319] and took his surname. The couple divorced in 1982.[320] Her second and current husband is quantum chemist and professor Joachim Sauer, who has largely avoided media attention during and after Merkel's political career.[321][322] They first met in 1981[323] and married in 1998.[324] Merkel has no children, but Sauer has two adult sons from a previous marriage.[325]", metadata={'source': 'https://en.wikipedia.org/wiki/Angela_Merkel', 'title': 'Angela Merkel - Wikipedia', 'language': 'en'}),
  0.2417650818824768),
 (Document(page_content='Personal life\nMain article: Family of Angela Merkel\nU.S. president Barack Obama, Michelle Obama, Merkel, and her husband Joachim Sauer, 2009', metadata={'source': 'https://en.wikipedia.org/wiki/Angela_Merkel', 'title': 'Angela Merkel - Wikipedia', 'language': 'en'}),
  0.2607648968696594),
 (Document(page_content='^ "An

## Multiquery retriever

In [11]:
import logging

from langchain.retrievers.multi_query import MultiQueryRetriever

logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)

retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(),
                                                  llm=llm)
unique_docs = retriever_from_llm.get_relevant_documents(query=question)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Who is Angela Merkel married to, if she is married?', "2. Can you provide information about Angela Merkel's marital status and her spouse?", '3. If Angela Merkel is married, could you please tell me the name of her spouse?']


6

# Generate

In [12]:
from langchain.chains import RetrievalQA


In [13]:
qa_chain = RetrievalQA.from_chain_type(llm=llm,retriever=vectorstore.as_retriever())
# qa_chain = RetrievalQA.from_chain_type(llm=llm_hf,retriever=vectorstore.as_retriever())
out = qa_chain({"query": question})
out

{'query': 'Is Angela Merkel married, if so to whom?',
 'result': 'Yes, Angela Merkel is married. Her current husband is Joachim Sauer, a quantum chemist and professor. They got married in 1998.'}

In [16]:
qa_chain = RetrievalQA.from_chain_type(llm,retriever=vectorstore.as_retriever())
out = qa_chain({"query": question})
out

{'query': 'Is Angela Merkel married, if so to whom?',
 'result': 'Yes, Angela Merkel is married. Her current husband is Joachim Sauer, a quantum chemist and professor. They got married in 1998.'}