In [1]:
import os
import re
import tempfile
from monty.tempfile import ScratchDir

from dotenv import load_dotenv
from langchain import hub
from langchain.agents import load_tools
from langchain_experimental.tools import PythonREPLTool
from langchain.agents import create_react_agent, AgentExecutor
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_google_genai import GoogleGenerativeAI
from langchain.tools import ArxivQueryRun, WikipediaQueryRun
from langchain.utilities import ArxivAPIWrapper, WikipediaAPIWrapper

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from llamp.mp.agents import (
    MPSummaryExpert,
    MPThermoExpert,
    MPElasticityExpert,
    MPDielectricExpert,
    MPPiezoelectricExpert,
    MPMagnetismExpert,
    MPElectronicExpert,
    MPSynthesisExpert,
    MPStructureRetriever,
)
from llamp.atomate2.agents import MLFFMDAgent
# from llamp.arxiv.agents import ArxivAgent

load_dotenv()

MP_API_KEY = os.getenv("MP_API_KEY", None)

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
OPENAI_ORGANIZATION = os.getenv("OPENAI_ORGANIZATION", None)
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", None)

OPENAI_GPT_MODEL = "gpt-4-1106-preview"
# OPENAI_GPT_MODEL = "gpt-4-0125-preview"
# OPENAI_GPT_MODEL = "gpt-3.5-turbo-1106"
# OPENAI_GPT_MODEL = "gpt-4-turbo"

In [2]:
top_llm = ChatOpenAI(
    temperature=0.2,
    model=OPENAI_GPT_MODEL,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()],
)

bottom_callback_handler = StreamingStdOutCallbackHandler()

bottom_llm = ChatOpenAI(
    temperature=0.0,
    model=OPENAI_GPT_MODEL,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    max_retries=5,
    streaming=True,
    callbacks=[bottom_callback_handler],
)


wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())

tools = load_tools(["llm-math"], llm=bottom_llm)
tools += [PythonREPLTool(
    description=re.sub(
        r"\s+",
        " ",
        """A Python shell. Use this to execute python commands.
        Input should be a valid python command.
        
        TIPS:
        - If you want to see the output of a value, you should print it out
        with `print(...)`.
        - If you write any file to a local file system, you should ALWAYS 
        print the paths to the files as well.
        - If you plot any graph, you should show the graph, save it to local 
        and print the path to the image file.
        - Combine all your output string together and print once together in
        JSON format at the end of the code.
        """
    )
)]
tools += [
    MPThermoExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPElasticityExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPDielectricExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPMagnetismExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPElectronicExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPPiezoelectricExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPSummaryExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPSynthesisExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPStructureRetriever(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MLFFMDAgent(llm=bottom_llm).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    arxiv,
    wikipedia,
]

instructions = re.sub(
    r"\s+",
    " ",
    """You name is LLaMP and you are a helpful agent that can consult 
    materials-related data through Materials Project (MP) database, arXiv, 
    Wikipedia, and run calculations/simulations via a python REPL and MP atomate2 
    computational workflow.     
        
    If you get an error, debug your code and try again. Only use 
    the output of your code to answer the question. Ask user to clarify their 
    queries if needed. Please note that you don't have direct control over MP but 
    through multiple assistant agents to help you. You need to provide complete 
    context in the input for assistants to do their job. REFINE and CRITQUE the 
    output of the assistants if needed. You can end the conversation by saying "Bye".
    """,
    )
base_prompt = hub.pull("langchain-ai/react-agent-template")
prompt = base_prompt.partial(instructions=instructions)

conversational_memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5, return_messages=True
)

agent = create_react_agent(top_llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent, tools=tools, verbose=True,
    handle_parsing_errors=True,
    memory=conversational_memory,
)

In [3]:


pattern = re.compile(r'\b' + re.escape("Bye") + r'\b[.!?]*')

query = """What are the bandgaps of the following materials in their most stable bulk form at 0K: Si, Ge, and GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3?"""

# with ScratchDir(rootpath=os.getcwd()):
# with tempfile.TemporaryDirectory(dir=os.getcwd()) as tmpdir:
#     os.chdir(tmpdir)
#     while True:
query = query or input("Query:")
print(f"Query: {query}")
response = agent_executor.invoke(
    {
        "input": query,
    }
)
# if re.search(pattern, response["output"]):
#     break
# query = None


Query: What are the bandgaps of the following materials in their most stable bulk form at 0K: Si, Ge, and GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3?


[1m> Entering new AgentExecutor chain...[0m
Thought: Do I need to use a tool? Yes
Action: MPSummaryExpert
Action Input: What are the bandgaps of Si, Ge, GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3 in their most stable bulk form at 0K?[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: MPSummaryExpert
Action Input: What are the bandgaps of Si, Ge, GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3 in their most stable bulk form at 0K?[0m

[1m> Entering new AgentExecutor chain...[0m
Action:
```json
{
  "action": "search_materials_summary__get",
  "action_input": {
    "formula": "Si,Ge,GaAs,GaN,SiC,BN,InSe,MoS2,BaTiO3,CsPbI3",
    "is_stable": true,
    "fields": "material_id,formula_pretty,band_gap"
  }
}
```[32;1m[1;3mAction:
```json
{
  "action": "search_materials_summary__get",
  "action_input": {
    "formula": "Si,G

Retrieving SummaryDoc documents:   0%|          | 0/10 [00:00<?, ?it/s]

[36;1m[1;3m[{'formula_pretty': 'SiC', 'material_id': 'mp-1204356', 'band_gap': 1.8389000000000006}, {'formula_pretty': 'MoS2', 'material_id': 'mp-1434', 'band_gap': 1.2042000000000002}, {'formula_pretty': 'Si', 'material_id': 'mp-149', 'band_gap': 0.6105}, {'formula_pretty': 'InSe', 'material_id': 'mp-20485', 'band_gap': 0.4590000000000001}, {'formula_pretty': 'GaAs', 'material_id': 'mp-2534', 'band_gap': 0.18610000000000015}, {'formula_pretty': 'Ge', 'material_id': 'mp-32', 'band_gap': 0.0}, {'formula_pretty': 'CsPbI3', 'material_id': 'mp-540839', 'band_gap': 2.5181}, {'formula_pretty': 'BaTiO3', 'material_id': 'mp-5777', 'band_gap': 2.2929999999999997}, {'formula_pretty': 'BN', 'material_id': 'mp-604884', 'band_gap': 4.3683}, {'formula_pretty': 'GaN', 'material_id': 'mp-804', 'band_gap': 1.7264999999999997}][0mAction:
```json
{
  "action": "Final Answer",
  "action_input": {
    "Si": 0.6105,
    "Ge": 0.0,
    "GaAs": 0.1861,
    "GaN": 1.7265,
    "SiC": 1.8389,
    "BN": 4.3683

In [4]:
print(agent_executor.memory.chat_memory.messages[1].content)

The bandgaps of the materials in their most stable bulk form at 0K are as follows:
- Silicon (Si): 0.6105 eV
- Germanium (Ge): 0.0 eV (Note: Germanium is a semiconductor with a small bandgap; however, the value of 0.0 eV suggests it might be considered as a metal in the database, which could be an error or due to the specific structure considered)
- Gallium arsenide (GaAs): 0.1861 eV
- Gallium nitride (GaN): 1.7265 eV
- Silicon carbide (SiC): 1.8389 eV
- Boron nitride (BN): 4.3683 eV
- Indium selenide (InSe): 0.459 eV
- Molybdenum disulfide (MoS2): 1.2042 eV
- Barium titanate (BaTiO3): 2.293 eV
- Cesium lead iodide (CsPbI3): 2.5181 eV


In [5]:

model = "gpt-3.5-turbo-1106"
llm = ChatOpenAI(
    temperature=0.7,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    # streaming=True
)

response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response.content)

gpt-3.5-turbo-1106
The bandgaps of the following materials in their most stable bulk form at 0K are as follows:

Si: 1.17 eV
Ge: 0.66 eV
GaAs: 1.52 eV
GaN: 3.39 eV
SiC: 3.26 eV
BN: 6.0 eV
InSe: 1.26 eV
MoS2: 1.29 eV
BaTiO3: 3.2 eV
CsPbI3: 1.73 eV


In [6]:

model = "gpt-4"
llm = ChatOpenAI(
    temperature=0.7,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    # streaming=True
)

response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response.content)

gpt-4
1. Silicon (Si): 1.17 eV
2. Germanium (Ge): 0.66 eV
3. Gallium Arsenide (GaAs): 1.43 eV
4. Gallium Nitride (GaN): 3.4 eV
5. Silicon Carbide (SiC): 3.0 eV
6. Boron Nitride (BN): 6.4 eV
7. Indium Selenide (InSe): 1.3 eV
8. Molybdenum Disulfide (MoS2): 1.29 eV
9. Barium Titanate (BaTiO3): 3.2 eV
10. Cesium Lead Iodide (CsPbI3): 1.73 eV

Please note that these values are approximate and can vary slightly depending on the source.


In [7]:

model = "gemini-pro"
llm = GoogleGenerativeAI(
    temperature=0.7,
    model=f"models/{model}",
    google_api_key=GOOGLE_API_KEY,
)


response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response)

gemini-pro
- Si: 1.17 eV (indirect)
- Ge: 0.66 eV (indirect)
- GaAs: 1.42 eV (direct)
- GaN: 3.4 eV (direct)
- SiC: 3.26 eV (indirect)
- BN: 5.5 eV (direct)
- InSe: 1.25 eV (direct)
- MoS2: 1.9 eV (direct)
- BaTiO3: 3.2 eV (indirect)
- CsPbI3: 1.73 eV (direct)


In [8]:
model = "llama3"
llm = ChatOllama(
    model=model,
    temperature=0.7,
    # callbacks=[StreamingStdOutCallbackHandler()],
    num_predict=-2,
)

response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response.content)

llama3
Here are the bandgaps for each material in their most stable bulk form at 0 K (absolute zero):

1. **Silicon (Si)**: Indirect bandgap of approximately 1.12 eV.
2. **Germanium (Ge)**: Indirect bandgap of approximately 0.66 eV.
3. **Gallium Arsenide (GaAs)**: Direct bandgap of approximately 1.42 eV.
4. **Gallium Nitride (GaN)**: Wide-bandgap semiconductor with a direct bandgap of approximately 3.44 eV.
5. **Silicon Carbide (SiC)**: Wide-bandgap semiconductor with a direct bandgap of approximately 3.23 eV.
6. **Boron Nitride (BN)**: Wide-bandgap insulator with a bandgap of approximately 6.4 eV.
7. **Indium Selenide (InSe)**: Direct bandgap of approximately 1.15 eV.
8. **Molybdenum Disulfide (MoS2)**: Indirect bandgap of approximately 1.88 eV.
9. **Barium Titanate (BaTiO3)**: Ferroelectric material with a bandgap of approximately 3.2-3.5 eV, depending on the specific crystal structure.
10. **Cesium Lead Iodide (CsPbI3)**: Direct bandgap of approximately 1.8 eV.

Please note that the