Import libraries

In [None]:
import ollama
from langchain_community.llms.ollama import Ollama
import bs4
import yaml
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings

Create a method that reads YAML data from a file, parses it, and returns a Sites instance populated with data from the YAML file.

In [None]:
from pydantic import BaseModel
from typing import List, Dict

class Site(BaseModel):
    blocks: List[str]
    links: List[str]

class Sites(BaseModel):
    sites: Dict[str, Site]

    @classmethod
    def from_yaml_file(cls, file_path: str) -> 'Sites':
        """
        Class method to create a Sites instance from a YAML file.

        :param file_path: Path to the YAML file.
        :return: An instance of Sites populated with the YAML data from the file.
        """
        with open(file_path, 'r') as file:
            data = yaml.safe_load(file)
        return cls(**data)

sites_instance = Sites.from_yaml_file('links.yaml')


Set up model and host

In [None]:
USE_MODEL = "llama3:8b" # ollama model from library https://ollama.com/library
OLLAMA_HOST = "http://galactica.lan:11434" # host that runs OLLAMA server app and serves the model

If model unavalible on target host then download it.

In [None]:
# client = ollama.Client(host=OLLAMA_HOST)
# client.pull(USE_MODEL)

In [None]:
llm = Ollama(model=USE_MODEL, base_url=OLLAMA_HOST)

In [None]:
docs = []
for site_name, site in sites_instance.sites.items():
    loader = WebBaseLoader(
        web_paths=site.links,
        bs_kwargs=dict(
            parse_only=bs4.SoupStrainer(class_=site.blocks)
        ),
    )
    docs.extend(loader.load())

To split it into 1,000-character chunks with 200-character overlaps using the RecursiveCharacterTextSplitter. This method helps maintain context and facilitates efficient retrieval. We'll also keep track of each chunk's starting index for reference.

To search text chunks, we'll embed each one and store these embeddings in a vector database. When querying, we embed the search text and use cosine similarity to find chunks with similar embeddings.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OllamaEmbeddings(model=USE_MODEL, base_url=OLLAMA_HOST),
    persist_directory="./chroma_storage"
)

Convert your vector store into a retriever with vectorstore.as_retriever(). This will allow you to search and retrieve relevant documents based on a query. 
hub.pull() to get a specific prompt from a repository. In this case, "rlm/rag-prompt" is the identifier for the prompt.
Define a function format_docs() that takes a list of documents and concatenates their content into a single string, separated by double newlines. This formatted text will be used as context for the model.

In [None]:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

Create a RAG chain to process the input question, search and format relevant documents, apply the prompt, and generate the final output.

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Example requests

In [None]:
rag_chain.invoke("What kind of skin care is best? ")

In [None]:
rag_chain.invoke("When are oral medications considered?  ")

In [None]:
rag_chain.invoke("Do topical medications help? ")

In [None]:
rag_chain.invoke("What are some examples of exposome factors?")

In [None]:
rag_chain.invoke("What specific type of professionals comprised the board that held the meetings?")