In [12]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [8]:
# Load PDF file
loader = PyPDFLoader("data/MD_review.pdf")
# Break into smaller chunks
pages = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size = 2000, chunk_overlap = 100))
# Embedded to local data vector database
db  = Chroma.from_documents(pages, OllamaEmbeddings(model="mxbai-embed-large",show_progress=True))
pages[0]

OllamaEmbeddings: 100%|██████████| 212/212 [16:32<00:00,  4.68s/it] 


Document(page_content='CHAPTER 6\nReactivity and Dynamics at Liquid\nInterfaces\nIlan Benjamin\nDepartmentofChemistryandBiochemistry,Universityof\nCalifornia,SantaCruz,CA95064,USA\nINTRODUCTION\nMany phenomena of interest in science and technology take place at the\ninterface between a liquid and a second phase. Corrosion, the operation of\nsolar cells, and the water splitting reaction are examples of chemical processesthat take place at the liquid/solid interface.\n1,2Electron transfer, ion transfer,\nand proton transfer reactions at the interface between two immiscible liquids\nare important for understanding processes such as ion extraction,3,4phase\ntransfer catalysis,5,6drug delivery,7and ion channel dynamics in membrane\nbiophysics.8The study of reactions at the water liquid/vapor interface is of\ncrucial importance in atmospheric chemistry.9,10Understanding the behavior\nof solute molecules adsorbed at these interfaces and their reactivity is also\nof fundamental theoretical int

In [9]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4}) # Turn VectorStore into a Retriever
local_model = "llama3" # [Phi 3, Mistral, Gamma]
llm = ChatOllama(model=local_model, temperature = 0) # temperature > 1 more creative, random, temperature < 1 deterministic, repetitive

# System prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "context"],
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [10]:
question = "given radial distribution function g(r), how do we find number of particle in a spherical shell of thickness dr?"
print(question)
print(rag_chain.invoke(question))

given radial distribution function g(r), how do we find number of particle in a spherical shell of thickness dr?


OllamaEmbeddings: 100%|██████████| 1/1 [00:01<00:00,  1.17s/it]


According to the context, given the radial distribution function g(r), you can find the number of particles in a spherical shell of thickness dr and radius r centered on a given particle by multiplying the probability density by the volume element:

4πr2g(r)ρLdr

This is the number of particles in a spherical shell of thickness dr and radius r centered on a given particle.


In [11]:
question = "What is a force field?"
print(question)
print(rag_chain.invoke(question))

What is a force field?


OllamaEmbeddings: 100%|██████████| 1/1 [00:03<00:00,  3.05s/it]


A force field is a type of potential energy function used in molecular simulations to describe the interactions between particles. In the context of polarizable force fields, it refers to a subset of many-body potentials that are important for simulating interfacial systems. These force fields take into account the induced dipoles and local electric fields within the system, which can affect the behavior of particles at liquid interfaces.


In [14]:
question = "How to set up a simulation for air/water interface?"
print(question)
print(rag_chain.invoke(question))

How to set up a simulation for air/water interface?


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]


Based on the provided context, it seems that setting up a simulation for an air/water interface involves using molecular dynamics (MD) simulations and considering various factors such as the type of dye molecule used, the solvation environment, and the roughness of the interface.

To set up a simulation, you would need to:

1. Choose a suitable MD software package, such as GROMACS or AMBER.
2. Define the system: air/water interface with a specific dye molecule (e.g., DCE).
3. Set up the initial conditions: temperature, pressure, and solvent properties (e.g., water density, surface tension).
4. Use polarizable potentials to account for the interactions between the dye molecules and the surrounding environment.
5. Run the simulation to generate the imaginary χ(2) spectra at the air/water interface.

Keep in mind that the specific details of the simulation setup may vary depending on the research question or goal. If you're looking for more detailed guidance, I recommend consulting releva