In [1]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnableParallel

In [2]:
# Load all file ends with .md
loader = DirectoryLoader('data/md', glob="**/*.md", show_progress=True, loader_cls=UnstructuredMarkdownLoader)
# Break documents into smaller chunks, RecursiveCharacterTextSplitter with chunk_size = 2000 is good
docs = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap = 100))
# Embedded to local data vector database
db  = Chroma.from_documents(docs, OllamaEmbeddings(model="mxbai-embed-large",show_progress=True))

100%|██████████| 49/49 [00:07<00:00,  6.19it/s]
OllamaEmbeddings: 100%|██████████| 251/251 [00:05<00:00, 45.04it/s]


In [3]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4}) # Turn VectorStore into a Retriever
local_model = "HPCBot-LLama3-1-8B-with-QA-only" # [Phi 3, Mistral, Gamma]
llm = ChatOllama(model=local_model, temperature = 0) # temperature > 1 more creative, random, temperature < 1 deterministic, repetitive

# System prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "context"],
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)


In [4]:
question = "How can I run a job on Polaris? Write an example job submission script?"
print(question)
print(rag_chain.invoke(question))

How can I run a job on Polaris? Write an example job submission script?


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 88.60it/s]


:// To run a job on Polaris using DeepSpeed, you can follow these steps:

1. Load the conda module and activate the base environment by running `module load conda` followed by `conda activate base`.
2. Clone or use the provided batch submission script for the example.
3. Request an interactive job from `polaris-login` using `qsub -A <project> -q debug-scaling -l select=2 -l walltime=01:00:00 -I`.
4. Modify the job parameters as needed, such as the number of processes, nodes, and walltime.
5. Save your changes by clicking "Save Settings" under the Options menu.

For running a DeepSpeed job on Polaris, you can use the following example job submission script:

```bash
#!/bin/sh

PBS -l select=1:system=polaris
PBS -l walltime=0:30:00
PBS -q debug
PBS -A Catalyst
PBS -l filesystems=home:eagle

cd ${PBS_O_WORKDIR}
mpiexec -n 1 ./executable
```

This script submits the job to one node in the debug queue on Polaris, requesting 30 minutes and the eagle and home filesystems. It will charge proje

In [5]:
question = "How to login to polaris"
print(question)
print(rag_chain.invoke(question))

How to login to polaris


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


:// To log into Polaris, follow these steps: ssh <username>@polaris.alcf.anl.gov Then, type in the password from your CRYPTOCard/MobilePASS+ token.


In [6]:
question = "What is Gromacs"
print(question)
print(rag_chain.invoke(question))

What is Gromacs


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


://manual.gromacs.org/documentation/2022.1/download.html
tar -xzf gromacs-20.2.tar.gz
module swap PrgEnv-nvhpc PrgEnv-gnu
module load cudatoolkit-standalone/11.2.2
module load gcc/10.3.0
module load cmake
cd gromacs-20.2
mkdir build
module load gcc/10.3.0
module load cmake
cd gromacs-20.2
mkdir build
cmake -DCMAKE_C_COMPILER=cc -DCMAKE_CXX_COMPILER=CC \ -DBUILD_SHARED_LIBS=OFF -DGMX_BUILD_OWN_FFTW=ON \ -DCMAKE_INSTALL_PREFIX=/path-to/gromacs-20.2/build \ -DGMX_MPI=ON -DGMX_OPENMP=ON -DGMX_GPU=CUDA \ -DCUDA_TOOLKIT_ROOT_DIR=/soft/compilers/cudatoolkit/cuda-11.2.2
make –j 8
make install
The installed binary is build/bin/gmx_mpi.
Prebuilt Gromacs binaries can be found in the directory /soft/applications/Gromacs/gromacs-20.2.
A sample pbs script follows that will run GROMACS on two nodes, using 4 MPI ranks per node, and each rank with four OpenMP threads. The PME kernel owns one MPI rank and one GPU per node, while the nonbonded kernel uses 3 MPI ranks and 3 GPUs per node.

```

!/bin/sh
P

In [7]:
question = "How to use GROMACS on Polaris"
print(question)
print(rag_chain.invoke(question))

How to use GROMACS on Polaris


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.00it/s]


://manual.gromacs.org/documentation/2022.1/download.html
tar -xzf gromacs-20

module swap PrgEnv-nvhpc PrgEnv-gnu
module load cudatoolkit-standalone/11.2.2
module load gcc/10.3.0
module load cmake
cd gromacs-20
mkdir build
module load gcc/10.3.0
module load cmake
cd gromacs-20
mkdir build
cmake -DCMAKE_C_COMPILER=cc -DCMAKE_CXX_COMPILER=CC \ -DBUILD_SHARED_LIBS=OFF -DGMX_BUILD_OWN_FFTW=ON \ -DCMAKE_INSTALL_PREFIX=/path-to/gromacs-20
make –j 8
make install
The installed binary is build/bin/gmx_mpi.
Prebuilt Gromacs binaries can be found in the directory /soft/applications/Gromacs/gromacs-20
A sample pbs script follows that will run GROMACS on two nodes, using 4 MPI ranks per node, and each rank with four OpenMP threads. The PME kernel owns one MPI rank and one GPU per node, while the nonbonded kernel uses 3 MPI ranks and 3 GPUs per node.

```

!/bin/sh
PBS -l select=2:system=polaris
PBS -l place=scatter
PBS -l walltime=0:30:00
PBS -q debug
PBS -A PROJECT
PBS -l filesystems=home:grand:ea