In [1]:
import os
import re
import tempfile
from monty.tempfile import ScratchDir

from dotenv import load_dotenv
from langchain import hub
from langchain.agents import load_tools
from langchain_experimental.tools import PythonREPLTool
from langchain.agents import create_react_agent, AgentExecutor, create_openai_functions_agent
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_google_genai import GoogleGenerativeAI
from langchain.tools import ArxivQueryRun, WikipediaQueryRun
from langchain.utilities import ArxivAPIWrapper, WikipediaAPIWrapper

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from llamp.mp.agents import (
    MPSummaryExpert,
    MPThermoExpert,
    MPElasticityExpert,
    MPDielectricExpert,
    MPPiezoelectricExpert,
    MPMagnetismExpert,
    MPElectronicExpert,
    MPSynthesisExpert,
    MPStructureRetriever,
)
from llamp.atomate2.agents import MLFFMDAgent
# from llamp.arxiv.agents import ArxivAgent

load_dotenv()

MP_API_KEY = os.getenv("MP_API_KEY", None)

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
OPENAI_ORGANIZATION = os.getenv("OPENAI_ORGANIZATION", None)
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", None)

OPENAI_GPT_MODEL = "gpt-4-1106-preview"
# OPENAI_GPT_MODEL = "gpt-4-0125-preview"
# OPENAI_GPT_MODEL = "gpt-3.5-turbo-1106"
# OPENAI_GPT_MODEL = "gpt-4-turbo"
llama = True # TODO: if we're running llama instead of gpt backbone for llamp
react = True 

In [2]:
if not llama:
    top_llm = ChatOpenAI(
        temperature=0.1,
        model=OPENAI_GPT_MODEL,
        openai_api_key=OPENAI_API_KEY,
        openai_organization=OPENAI_ORGANIZATION,
        streaming=True,
        callbacks=[StreamingStdOutCallbackHandler()],
    )
else: 
    model = "llama3.1"
    top_llm = ChatOllama(
        model=model,
        temperature=0.1,
        callbacks=[StreamingStdOutCallbackHandler()],
        num_predict=-2,
    )


bottom_callback_handler = StreamingStdOutCallbackHandler()

if not llama:
    bottom_llm = ChatOpenAI(
        temperature=0.0,
        model=OPENAI_GPT_MODEL,
        openai_api_key=OPENAI_API_KEY,
        openai_organization=OPENAI_ORGANIZATION,
        max_retries=5,
        streaming=True,
        callbacks=[bottom_callback_handler],
    )
else:
       bottom_llm =  ChatOllama(
        model=model,
        temperature=0.1,
        callbacks=[StreamingStdOutCallbackHandler()],
        num_predict=-2,
    )


wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
arxiv = ArxivQueryRun(api_wrapper=ArxivAPIWrapper())

tools = load_tools(["llm-math"], llm=bottom_llm)
tools += [PythonREPLTool(
    description=re.sub(
        r"\s+",
        " ",
        """A Python shell. Use this to execute python commands.
        Input should be a valid python command.
        
        TIPS:
        - If you want to see the output of a value, you should print it out
        with `print(...)`.
        - If you write any file to a local file system, you should ALWAYS 
        print the paths to the files as well.
        - If you plot any graph, you should show the graph, save it to local 
        and print the path to the image file.
        - Combine all your output string together and print once together in
        JSON format at the end of the code.
        """
    )
)]
tools += [
    MPThermoExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPElasticityExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPDielectricExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPMagnetismExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPElectronicExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPPiezoelectricExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPSummaryExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPSynthesisExpert(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MPStructureRetriever(llm=bottom_llm, mp_api_key=MP_API_KEY).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    MLFFMDAgent(llm=bottom_llm).as_tool(
        agent_kwargs=dict(return_intermediate_steps=False)
    ),
    arxiv,
    wikipedia,
]

instructions = re.sub(
    r"\s+",
    " ",
    """You name is LLaMP and you are a helpful agent that can consult 
    materials-related data through Materials Project (MP) database, arXiv, 
    Wikipedia, and run calculations/simulations via a python REPL and MP atomate2 
    computational workflow.     
        
    If you get an error, debug your code and try again. Only use 
    the output of your code to answer the question. Ask user to clarify their 
    queries if needed. Please note that you don't have direct control over MP but 
    through multiple assistant agents to help you. You need to provide complete 
    context in the input for assistants to do their job. REFINE and CRITQUE the 
    output of the assistants if needed. You can end the conversation by saying "Bye".
    """,
    )
if react:
    base_prompt = hub.pull("langchain-ai/react-agent-template")
    prompt = base_prompt.partial(instructions=instructions)
else:
    base_prompt = hub.pull("hwchase17/openai-functions-agent")
    prompt = base_prompt.partial(instructions=instructions) 

conversational_memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5, return_messages=True
)

if react:
    agent = create_react_agent(top_llm, tools, prompt)
else:
    agent = create_openai_functions_agent(top_llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent, tools=tools, verbose=True,
    handle_parsing_errors=True,
    memory=conversational_memory,
)

In [3]:


pattern = re.compile(r'\b' + re.escape("Bye") + r'\b[.!?]*')

query = """What are the bandgaps of the following materials in their most stable bulk form at 0K: Si, Ge, and GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3?"""

# with ScratchDir(rootpath=os.getcwd()):
# with tempfile.TemporaryDirectory(dir=os.getcwd()) as tmpdir:
#     os.chdir(tmpdir)
#     while True:
query = query or input("Query:")
print(f"Query: {query}")
response = agent_executor.invoke(
    {
        "input": query,
    }
)
# if re.search(pattern, response["output"]):
#     break
# query = None


Query: What are the bandgaps of the following materials in their most stable bulk form at 0K: Si, Ge, and GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3?


[1m> Entering new AgentExecutor chain...[0m
Thought: Do I need to use a tool? Yes
Action: MPSummaryExpert
Action Input: What are the bandgaps of Si, Ge, GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3 in their most stable bulk form at 0K?[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: MPSummaryExpert
Action Input: What are the bandgaps of Si, Ge, GaAs, GaN, SiC, BN, InSe, MoS2, BaTiO3, CsPbI3 in their most stable bulk form at 0K?[0m

[1m> Entering new AgentExecutor chain...[0m
To find the bandgaps of the specified materials in their most stable bulk form at 0K, I will use the `search_materials_summary__get` tool with the `formula` argument set to the chemical formulas of the materials and the `is_stable` argument set to `true` to ensure we are looking at the most stable forms.

Action:
```
{
  "action": "search_materi

Retrieving SummaryDoc documents:   0%|          | 0/10 [00:00<?, ?it/s]

[36;1m[1;3m[{'formula_pretty': 'SiC', 'material_id': 'mp-1204356', 'band_gap': 1.8389000000000006}, {'formula_pretty': 'MoS2', 'material_id': 'mp-1434', 'band_gap': 1.2042000000000002}, {'formula_pretty': 'Si', 'material_id': 'mp-149', 'band_gap': 0.6105}, {'formula_pretty': 'InSe', 'material_id': 'mp-20485', 'band_gap': 0.4590000000000001}, {'formula_pretty': 'GaAs', 'material_id': 'mp-2534', 'band_gap': 0.18610000000000015}, {'formula_pretty': 'Ge', 'material_id': 'mp-32', 'band_gap': 0.0}, {'formula_pretty': 'CsPbI3', 'material_id': 'mp-540839', 'band_gap': 2.5181}, {'formula_pretty': 'BaTiO3', 'material_id': 'mp-5777', 'band_gap': 2.2929999999999997}, {'formula_pretty': 'BN', 'material_id': 'mp-604884', 'band_gap': 4.3683}, {'formula_pretty': 'GaN', 'material_id': 'mp-804', 'band_gap': 1.7264999999999997}][0mAction:
```
{
  "action": "Final Answer",
  "action_input": "The band gaps of the most stable bulk forms of the specified materials at 0K are as follows: Si (Silicon) - 0.61

In [4]:
print(agent_executor.memory.chat_memory.messages[1].content)

The bandgaps of the most stable bulk forms of the specified materials at 0K are as follows: 
- Si (Silicon): 0.6105 eV
- Ge (Germanium): 0.0 eV (metallic)
- GaAs (Gallium Arsenide): 0.1861 eV
- GaN (Gallium Nitride): 1.7265 eV
- SiC (Silicon Carbide): 1.8389 eV
- BN (Boron Nitride): 4.3683 eV
- InSe (Indium Selenide): 0.459 eV
- MoS2 (Molybdenum Disulfide): 1.2042 eV
- BaTiO3 (Barium Titanate): 2.293 eV
- CsPbI3 (Cesium Lead Iodide): 2.5181 eV.


In [6]:
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_openai import ChatOpenAI

from langchain_community.tools.you import YouSearchTool
from langchain_community.utilities.you import YouSearchAPIWrapper

model = "gpt-4"
llm = ChatOpenAI(
    temperature=0.1,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
)

instructions = """You are an assistant that can consult SerpAPI to answer materials related queries.
Use your judgement to return one estimated value if multiple values are retrieved. You have access to PythonREPL if calculation is needed."""
# base_prompt = hub.pull("langchain-ai/openai-functions-template")
# prompt = base_prompt.partial(instructions=instructions)

base_prompt = hub.pull("langchain-ai/react-agent-template")
prompt = base_prompt.partial(instructions=instructions)

you_tool = YouSearchTool(api_wrapper=YouSearchAPIWrapper(num_web_results=1))

tools = load_tools(["llm-math"], llm=llm)
tools += load_tools(["serpapi"])
# tools += [you_tool]


agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    verbose=True,
    max_iterations=50,
    handle_parsing_errors=True
)


for i in range(5):

    response = agent_executor.invoke(
        {
            "input": query,
        }
    )
    
    print(f"{model}+Serp")
    print(response["output"])





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: Search
Action Input: Bandgap of Si at 0K[0m[33;1m[1;3m[{'link': 'https://physics.stackexchange.com/questions/552296/question-on-semiconductors-and-band-gap-at-absolute-zero', 'source': 'Stack Exchange', 'question': 'Question on semiconductors and band gap at absolute zero', 'answer': 'It would be useful to understand the difference between conductivity in metals and in semiconductors/insulators. The electrical conductivity depends on the concentration of electrons in the conduction band and the mobility of the electrons. In semiconductors and insulators, at zero K the conduction band is empty. So teher are no free electrons and the conductivity is zero. As the temperature increases some electrons from the valence band are excited into the conduction band. So the conductivity becomes non-zero but it is still much lower than in metals. This is due to the fact that the concentrati

In [12]:

model = "gpt-3.5-turbo-1106"
llm = ChatOpenAI(
    temperature=0.1,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    # streaming=True
)

response = llm.invoke(query)

print(model)
print(response.content)

gpt-3.5-turbo-1106
The bandgaps of the following materials in their most stable bulk form at 0K are:

- Si: 1.17 eV
- Ge: 0.66 eV
- GaAs: 1.42 eV
- GaN: 3.4 eV
- SiC: 3.2 eV
- BN: 6 eV
- InSe: 1.2 eV
- MoS2: 1.9 eV
- BaTiO3: 3.2 eV
- CsPbI3: 1.7 eV


In [15]:

model = "gpt-4"
llm = ChatOpenAI(
    temperature=0.1,
    model=model,
    openai_api_key=OPENAI_API_KEY,
    openai_organization=OPENAI_ORGANIZATION,
    # streaming=True
)

response = llm.invoke(query)

print(model)
print(response.content)

gpt-4
1. Silicon (Si): 1.12 eV
2. Germanium (Ge): 0.66 eV
3. Gallium Arsenide (GaAs): 1.43 eV
4. Gallium Nitride (GaN): 3.4 eV
5. Silicon Carbide (SiC): 3.26 eV
6. Boron Nitride (BN): 6.4 eV
7. Indium Selenide (InSe): 1.3 eV
8. Molybdenum Disulfide (MoS2): 1.29 eV
9. Barium Titanate (BaTiO3): 3.2 eV
10. Cesium Lead Iodide (CsPbI3): 1.73 eV

Please note that these values are approximate and can vary slightly depending on the source. Also, the bandgap can change with temperature and pressure.


In [17]:

model = "gemini-pro"
llm = GoogleGenerativeAI(
    temperature=0.1,
    model=f"models/{model}",
    google_api_key=GOOGLE_API_KEY,
)


response = llm.invoke(query)

print(model)
print(response)

gemini-pro
| Material | Bandgap (eV) |
|---|---|
| Si | 1.12 |
| Ge | 0.66 |
| GaAs | 1.42 |
| GaN | 3.44 |
| SiC | 3.26 |
| BN | 5.1 |
| InSe | 1.27 |
| MoS2 | 1.89 |
| BaTiO3 | 3.2 |
| CsPbI3 | 1.73 |


In [20]:
model = "llama3"
llm = ChatOllama(
    model=model,
    temperature=0.1,
    # callbacks=[StreamingStdOutCallbackHandler()],
    num_predict=-2,
)

response = llm.invoke(query)

print(model)
print(response.content)

llama3
Here are the bandgaps for each material in their most stable bulk form at 0 K:

1. Silicon (Si): 1.12 eV
2. Germanium (Ge): 0.66 eV
3. Gallium Arsenide (GaAs): 1.42 eV
4. Gallium Nitride (GaN): 3.44 eV
5. Silicon Carbide (SiC): 3.02 eV
6. Boron Nitride (BN): 6.2 eV
7. Indium Selenide (InSe): 1.15 eV
8. Molybdenum Disulfide (MoS2): 1.9 eV
9. Barium Titanate (BaTiO3): 3.5-4.0 eV (dependent on the specific crystal structure)
10. Cesium Lead Iodide (CsPbI3): 1.7-2.0 eV (dependent on the specific crystal structure)

Note that these values are for the most stable bulk form of each material at 0 K, which may not necessarily be the same as the value you would measure experimentally due to defects, impurities, or other factors.

Also, keep in mind that some of these materials have multiple polymorphs or phases with different bandgaps. For example, GaN has a wurtzite phase and a zincblende phase, each with its own bandgap value. Similarly, MoS2 has a 2H phase and a 1T' phase, each with it