In [1]:
import json
import os

import openai
from dotenv import load_dotenv
from mp_api.client import MPRester

from langchain.tools import APIOperation, OpenAPISpec

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
MP_API_KEY = os.getenv("MP_API_KEY", None)
openai.api_key = OPENAI_API_KEY

In [3]:
import json
import os

import openai
from dotenv import load_dotenv
from mp_api.client import MPRester

from langchain.tools import APIOperation, OpenAPISpec

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", None)
MP_API_KEY = os.getenv("MP_API_KEY", None)
openai.api_key = OPENAI_API_KEY

spec = OpenAPISpec.from_url("https://api.materialsproject.org/openapi.json")
operation = APIOperation.from_openapi_spec(
    spec, "/materials/summary/{material_id}/", "get"
)

# from langchain.chat_models import ChatOpenAI, ChatAnthropic
# from langchain.schema import HumanMessage, AIMessage, ChatMessage
# from langchain.tools import format_tool_to_openai_function

# llm = ChatAnthropic(anthropic_api_key=)
# llm = ChatOpenAI(model='gpt-4-0613', temperature=0.5, openai_api_key=OPENAI_API_KEY, client=)


class LLMaterialsAgent:
    def __init__(self, mp_api_key=MP_API_KEY, openai_api_key=OPENAI_API_KEY):
        # Initialize the Materials Project API
        self.mpr = MPRester(mp_api_key)
        # Initialize the OpenAI API
        openai.api_key = openai_api_key

    def get_materials_data(self, query_params):
        # Retrieve data from the Materials Project using the MPRester class
        data = self.mpr.summary.search(**query_params)
        return data

    def run_conversation(self, user_input):
        # Step 1: send the conversation and available functions to GPT
        messages = [{"role": "user", "content": user_input}]
        functions = [
            {
                "name": "get_materials_data",
                "description": "Get materials data from the Materials Project",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "elements": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": "Elements to query, e.g., ['Si', 'O']",
                        },
                        "band_gap": {
                            "type": "array",
                            "items": {"type": "number"},
                            "description": "Range of band gap values, e.g., [0.5, 1.0]",
                        },
                        "fields": {
                            "type": "array",
                            "items": {"type": "string"},
                            "description": f"Fields to return, including {self.mpr.summary.available_fields}",
                        }
                        # "limit": {
                        #     "type": "integer",
                        #     "description": "Number of materials to retrieve, e.g., 5",
                        # },
                    },
                    "required": ["elements", "band_gap"],
                },
            }
        ]
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo-0613",
            messages=messages,
            functions=functions,
            function_call="auto",  # auto is default, but we'll be explicit
        )
        response_message = response["choices"][0]["message"]

        # Step 2: check if GPT wanted to call a function
        if response_message.get("function_call"):
            # Step 3: call the function
            # Note: the JSON response may not always be valid; be sure to handle errors
            available_functions = {
                "get_materials_data": self.get_materials_data,
            }  # only one function in this example, but you can have multiple
            function_name = response_message["function_call"]["name"]
            function_to_call = available_functions[function_name]
            function_args = json.loads(response_message["function_call"]["arguments"])
            function_response = function_to_call(query_params=function_args)

            # breakpoint()

            # Step 4: send the info on the function call and function response to GPT
            messages.append(
                response_message
            )  # extend conversation with assistant's reply
            # breakpoint()
            messages.append(
                {
                    "role": "function",
                    "name": function_name,
                    "content": json.dumps(function_response[0]),
                    # "content": function_response,
                }
            )  # extend conversation with function response
            # breakpoint()
            second_response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo-0613",
                messages=messages,
            )  # get a new response from GPT where it can see the function response
            # breakpoint()
            return second_response

In [4]:
ll_agent = LLMaterialsAgent()
user_input = input("Please enter your natural language query: ")




In [5]:
messages = [{"role": "user", "content": user_input}]
functions = [
    {
        "name": "get_materials_data",
        "description": "Get materials data from the Materials Project",
        "parameters": {
            "type": "object",
            "properties": {
                "elements": {
                    "type": "array",
                    "items": {"type": "string"},
                    "description": "Elements to query, e.g., ['Si', 'O']",
                },
                "band_gap": {
                    "type": "array",
                    "items": {"type": "number"},
                    "description": "Range of band gap values, e.g., [0.5, 1.0]",
                },
                # "limit": {
                #     "type": "integer",
                #     "description": "Number of materials to retrieve, e.g., 5",
                # },
            },
            "required": ["elements", "band_gap"],
        },
    }
]
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-0613",
    messages=messages,
    functions=functions,
    function_call="auto",  # auto is default, but we'll be explicit
)
response_message = response["choices"][0]["message"]

In [6]:
available_functions = {
    "get_materials_data": ll_agent.get_materials_data,
}  # only one function in this example, but you can have multiple
function_name = response_message["function_call"]["name"]
function_to_call = available_functions[function_name]
function_args = json.loads(response_message["function_call"]["arguments"])
function_response = function_to_call(query_params=function_args)

Retrieving SummaryDoc documents:   0%|          | 0/724 [00:00<?, ?it/s]

In [26]:
function_response[0].



In [24]:
from monty.json import MontyEncoder, MontyDecoder

# json.dumps(function_response[0], cls=MontyEncoder)
# json.dumps(function_response[0].dict(), indent=4, cls=MontyDecoder)
# function_response[0]
MontyDecoder().decode(function_response[0].__str__())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [29]:
function_response[0]

[4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mbuilder_meta[0;0m=EmmetMeta(emmet_version='0.38.6', pymatgen_version='2022.10.22', pull_request=825, database_version='2022.10.28', build_date=datetime.datetime(2022, 10, 28, 0, 56, 47, 317000)),
[1mnsites[0;0m=76,
[1melements[0;0m=[Element C, Element H, Element N, Element O, Element P],
[1mnelements[0;0m=5,
[1mcomposition[0;0m=Composition('P4 H36 C4 N16 O16'),
[1mcomposition_reduced[0;0m=Composition('P1 H9 C1 N4 O4'),
[1mformula_pretty[0;0m='PH9C(NO)4',
[1mformula_anonymous[0;0m='ABC4D4E9',
[1mchemsys[0;0m='C-H-N-O-P',
[1mvolume[0;0m=729.1804951382222,
[1mdensity[0;0m=1.5674917207978354,
[1mdensity_atomic[0;0m=9.594480199187135,
[1msymmetry[0;0m=SymmetryData(crystal_system=<CrystalSystem.tri: 'Triclinic'>, symbol='P-1', number=2, point_group='-1', symprec=0.1, version='1.16.2'),
[1mproperty_name[0;0m='summary',
[1mmaterial_id[0;0m=MPID(mp-604964),
[1mdeprecated[0;0m=False,
[1mdeprecation_reasons[0;0m=None,

In [27]:

messages.append(
    {
        "role": "function",
        "name": function_name,
        # "content": json.dumps(function_response[0]),
        "content": function_response[0].__str__(),
    }
)

In [28]:
second_response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-0613",
    messages=messages,
)  # get a new response from GPT where it can see the function response

InvalidRequestError: This model's maximum context length is 4097 tokens. However, your messages resulted in 6107 tokens. Please reduce the length of the messages.

In [59]:
from emmet.core.summary import SummaryDoc
from monty.serialization import 


TypeError: __init__() takes exactly 1 positional argument (2 given)

In [8]:
from pymatgen.core import Structure, Element
import datetime
def serialize_nested_object(obj):
    if isinstance(obj, dict):
        serialized_dict = {}
        for key, value in obj.items():
            serialized_dict[key] = serialize_nested_object(value)
        return serialized_dict
    elif isinstance(obj, list):
        serialized_list = []
        for item in obj:
            serialized_list.append(serialize_nested_object(item))
        return serialized_list
    elif isinstance(obj, (datetime.datetime, Element)):
        return str(obj)
    elif isinstance(obj, (str, int, float)):
        return obj
    else:
        # Handle other data types or custom objects as needed
        return str(obj)


serialize_nested_object(function_response[0].dict())


{'builder_meta': {'emmet_version': '0.38.6',
  'pymatgen_version': '2022.10.22',
  'pull_request': 825,
  'database_version': '2022.10.28',
  'build_date': '2022-10-28 00:56:47.317000'},
 'nsites': 76,
 'elements': ['C', 'H', 'N', 'O', 'P'],
 'nelements': 5,
 'composition': 'P4 H36 C4 N16 O16',
 'composition_reduced': 'P1 H9 C1 N4 O4',
 'formula_pretty': 'PH9C(NO)4',
 'formula_anonymous': 'ABC4D4E9',
 'chemsys': 'C-H-N-O-P',
 'volume': 729.1804951382222,
 'density': 1.5674917207978354,
 'density_atomic': 9.594480199187135,
 'symmetry': {'crystal_system': 'Triclinic',
  'symbol': 'P-1',
  'number': 2,
  'point_group': '-1',
  'symprec': 0.1,
  'version': '1.16.2'},
 'property_name': 'summary',
 'material_id': MPID(mp-604964),
 'deprecated': False,
 'deprecation_reasons': 'None',
 'last_updated': '2022-10-28 00:56:47.317000',
 'origins': [],
 'structure': 'Full Formula (P4 H36 C4 N16 O16)\nReduced Formula: PH9C(NO)4\nabc   :   6.904773   7.560272  14.335972\nangles:  87.002792  80.674634

In [60]:
from monty.json import MontyEncoder

encoder = MontyEncoder()
a = encoder.encode(function_response[0])

TypeError: 'str' object does not support item assignment

In [63]:
from monty.json import MontyDecoder, MontyEncoder

decoder = MontyDecoder()
decoder.decode(function_response[0].dict())
# encoder = MontyEncoder()
# encoder.encode()
# json.dumps(function_response[0])

TypeError: the JSON object must be str, bytes or bytearray, not dict