In [1]:
import os
import re
import tempfile
from monty.tempfile import ScratchDir

from dotenv import load_dotenv
from langchain import hub
from langchain.agents import load_tools
from langchain_experimental.tools import PythonREPLTool
from langchain.agents import create_react_agent, AgentExecutor
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_openai import ChatOpenAI
from langchain_google_genai import GoogleGenerativeAI
from langchain.tools import ArxivQueryRun, WikipediaQueryRun
from langchain.utilities import ArxivAPIWrapper, WikipediaAPIWrapper

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from llamp.mp.agents import (
    MPSummaryExpert,
    MPThermoExpert,
    MPElasticityExpert,
    MPDielectricExpert,
    MPPiezoelectricExpert,
    MPMagnetismExpert,
    MPElectronicExpert,
    MPSynthesisExpert,
    MPStructureRetriever,
)
from llamp.atomate2.agents import MLFFMDAgent
# from llamp.arxiv.agents import ArxivAgent

load_dotenv()

MP_API_KEY = os.getenv("MP_API_KEY", None)

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
OPENAI_ORGANIZATION = os.getenv("OPENAI_ORGANIZATION", None)
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", None)

OPENAI_GPT_MODEL = "gpt-4-1106-preview"
# OPENAI_GPT_MODEL = "gpt-4-0125-preview"
# OPENAI_GPT_MODEL = "gpt-3.5-turbo-1106"
# OPENAI_GPT_MODEL = "gpt-4-turbo"

In [2]:
top_llm = ChatOpenAI(
    temperature=0.2,
    model=OPENAI_GPT_MODEL,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    streaming=True,
    callbacks=[StreamingStdOutCallbackHandler()],
)

bottom_callback_handler = StreamingStdOutCallbackHandler()

bottom_llm = ChatOpenAI(
    temperature=0.0,
    model=OPENAI_GPT_MODEL,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    max_retries=5,
    streaming=True,
    callbacks=[bottom_callback_handler],
)


wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())

tools = load_tools(["llm-math"], llm=bottom_llm)
tools += [PythonREPLTool(
    description=re.sub(
        r"\s+",
        " ",
        """A Python shell. Use this to execute python commands.
        Input should be a valid python command.
        
        TIPS:
        - If you want to see the output of a value, you should print it out
        with `print(...)`.
        - If you write any file to a local file system, you should ALWAYS 
        print the paths to the files as well.
        - If you plot any graph, you should show the graph, save it to local 
        and print the path to the image file.
        - Combine all your output string together and print once together in
        JSON format at the end of the code.
        """
    )
)]
tools += [
    MPThermoExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPElasticityExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPDielectricExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPMagnetismExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPElectronicExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPPiezoelectricExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPSummaryExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPSynthesisExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPStructureRetriever(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MLFFMDAgent(llm=bottom_llm).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    arxiv,
    wikipedia,
]

instructions = re.sub(
    r"\s+",
    " ",
    """You name is LLaMP and you are a helpful agent that can consult 
    materials-related data through Materials Project (MP) database, arXiv, 
    Wikipedia, and run calculations/simulations via a python REPL and MP atomate2 
    computational workflow.     
        
    If you get an error, debug your code and try again. Only use 
    the output of your code to answer the question. Ask user to clarify their 
    queries if needed. Please note that you don't have direct control over MP but 
    through multiple assistant agents to help you. You need to provide complete 
    context in the input for assistants to do their job. REFINE and CRITQUE the 
    output of the assistants if needed. You can end the conversation by saying "Bye".
    """,
    )
base_prompt = hub.pull("langchain-ai/react-agent-template")
prompt = base_prompt.partial(instructions=instructions)

conversational_memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5, return_messages=True
)

agent = create_react_agent(top_llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent, tools=tools, verbose=True,
    handle_parsing_errors=True,
    memory=conversational_memory,
)

In [3]:


pattern = re.compile(r'\b' + re.escape("Bye") + r'\b[.!?]*')

query = """What are the bandgaps of the following materials in their most stable bulk form at 0K: Si, Ge, and GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3?"""

# with ScratchDir(rootpath=os.getcwd()):
# with tempfile.TemporaryDirectory(dir=os.getcwd()) as tmpdir:
#     os.chdir(tmpdir)
#     while True:
query = query or input("Query:")
print(f"Query: {query}")
response = agent_executor.invoke(
    {
        "input": query,
    }
)
# if re.search(pattern, response["output"]):
#     break
# query = None


Query: What are the bandgaps of the following materials in their most stable bulk form at 0K: Si, Ge, and GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3?


[1m> Entering new AgentExecutor chain...[0m
Thought: Do I need to use a tool? Yes
Action: MPSummaryExpert
Action Input: What are the bandgaps of Si, Ge, GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3 in their most stable bulk form at 0K?[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: MPSummaryExpert
Action Input: What are the bandgaps of Si, Ge, GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3 in their most stable bulk form at 0K?[0m

[1m> Entering new AgentExecutor chain...[0m
Action:
```
{
  "action": "search_materials_summary__get",
  "action_input": {
    "formula": "Si,Ge,GaAs,GaN,SiC,BN,InSe,MoS2,BaTiO3,CsPbI3",
    "is_stable": true,
    "fields": "material_id,formula_pretty,band_gap"
  }
}
```[32;1m[1;3mAction:
```
{
  "action": "search_materials_summary__get",
  "action_input": {
    "formula": "Si,Ge,GaAs,G

Retrieving SummaryDoc documents:   0%|          | 0/10 [00:00<?, ?it/s]

[36;1m[1;3m[{'formula_pretty': 'InSe', 'material_id': 'mp-20485', 'band_gap': 0.4590000000000001}, {'formula_pretty': 'MoS2', 'material_id': 'mp-1434', 'band_gap': 1.2042000000000002}, {'formula_pretty': 'BN', 'material_id': 'mp-604884', 'band_gap': 4.3683}, {'formula_pretty': 'SiC', 'material_id': 'mp-1204356', 'band_gap': 1.8389000000000006}, {'formula_pretty': 'GaAs', 'material_id': 'mp-2534', 'band_gap': 0.18610000000000015}, {'formula_pretty': 'GaN', 'material_id': 'mp-804', 'band_gap': 1.7264999999999997}, {'formula_pretty': 'Ge', 'material_id': 'mp-32', 'band_gap': 0.0}, {'formula_pretty': 'Si', 'material_id': 'mp-149', 'band_gap': 0.6105}, {'formula_pretty': 'BaTiO3', 'material_id': 'mp-5777', 'band_gap': 2.2929999999999997}, {'formula_pretty': 'CsPbI3', 'material_id': 'mp-540839', 'band_gap': 2.5181}][0mAction:
```
{
  "action": "Final Answer",
  "action_input": "The band gaps of the most stable bulk forms at 0K for the requested materials are as follows: Si (Silicon) - 0.6

In [4]:
print(agent_executor.memory.chat_memory.messages[1].content)

The bandgaps of the most stable bulk forms at 0K for the requested materials are as follows:
- Si (Silicon): 0.6105 eV
- Ge (Germanium): 0.0 eV
- GaAs (Gallium Arsenide): 0.1861 eV
- GaN (Gallium Nitride): 1.7265 eV
- SiC (Silicon Carbide): 1.8389 eV
- BN (Boron Nitride): 4.3683 eV
- InSe (Indium Selenide): 0.459 eV
- MoS2 (Molybdenum Disulfide): 1.2042 eV
- BaTiO3 (Barium Titanate): 2.293 eV
- CsPbI3 (Cesium Lead Iodide): 2.5181 eV.


In [5]:

model = "gpt-3.5-turbo-1106"
llm = ChatOpenAI(
    temperature=0.7,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    # streaming=True
)

response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response.content)

gpt-3.5-turbo-1106
The bandgaps of the following materials at 0K are:

- Si: 1.17 eV
- Ge: 0.66 eV
- GaAs: 1.52 eV
- GaN: 3.39 eV
- SiC: 3.23 eV
- BN: 6.0 eV
- InSe: 1.26 eV
- MoS2: 1.29 eV
- BaTiO3: 3.2 eV
- CsPbI3: 1.73 eV


In [6]:

model = "gpt-4"
llm = ChatOpenAI(
    temperature=0.7,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    # streaming=True
)

response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response.content)

gpt-4
1. Silicon (Si): 1.12 eV
2. Germanium (Ge): 0.66 eV
3. Gallium Arsenide (GaAs): 1.42 eV
4. Gallium Nitride (GaN): 3.4 eV
5. Silicon Carbide (SiC): 2.36 eV
6. Boron Nitride (BN): 6.4 eV (for hexagonal BN which is the most stable bulk form)
7. Indium Selenide (InSe): 1.3 eV
8. Molybdenum Disulfide (MoS2): 1.29 eV
9. Barium Titanate (BaTiO3): 3.2 eV
10. Cesium Lead Iodide (CsPbI3): 1.73 eV

Please note that the exact bandgap can vary slightly depending on the measurement technique and sample quality. Also, temperature can affect the bandgap value. In addition, for some materials, the bandgap can be direct or indirect which is also an important factor for their optoelectronic properties.


In [7]:

model = "gemini-pro"
llm = GoogleGenerativeAI(
    temperature=0.7,
    model=f"models/{model}",
    google_api_key=GOOGLE_API_KEY,
)


response = llm.invoke(agent_executor.memory.chat_memory.messages[0].content)

print(model)
print(response)

gemini-pro
| Material | Bandgap (eV) |
|---|---|
| Si | 1.17 |
| Ge | 0.66 |
| GaAs | 1.42 |
| GaN | 3.4 |
| SiC | 3.26 |
| BN | 6.4 |
| InSe | 1.3 |
| MoS2 | 1.9 |
| BaTiO3 | 3.2 |
| CsPbI3 | 1.73 |
