In [None]:
import os
from langchain_openai import ChatOpenAI, AzureChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage

def design_synthesis(api_key: str,
                      synthesis_text: str,
                      azure: bool = True, 
                      model_name: str=None,
                      temp: float=0,
                      azure_api_version: str=None,
                      azure_endpoint: str=None) -> str:
    """
    Extract synthesis procedure from a given text.

    Parameters:
        api_key (str): OpenAI API key.
        synthesis_text (str): The text containing synthesis information.
        azure (bool): Flag to indicate if Azure OpenAI service should be used.
        model_name (str, optional): The name of the Azure deployment or openai model name. Defaults to None.
        temp (float, optional): The temperature for the model. Defaults to 0.
        azure_api_version (str, optional): The API version for Azure OpenAI service. Defaults to None.
        azure_endpoint (str, optional): The endpoint for Azure OpenAI service. Defaults to None.
        
    Returns:
        str: JSON string containing the extracted synthesis procedure.
    """
    if not azure:
        llm = ChatOpenAI(model_name=model_name or os.getenv("OPENAI_MODEL_NAME"),  # Replace with your OpenAI model name
                         temperature=temp, 
                         openai_api_key=api_key or os.getenv("OPENAI_API_KEY"))
    else:
        llm = AzureChatOpenAI(
            azure_deployment=model_name or os.getenv("AZURE_MODEL_DEPLOYMENT_NAME"),  # Replace with your Azure deployment name
            api_version=azure_api_version or os.getenv("AZURE_OPENAI_API_VERSION"),
            temperature=temp or 0,
            openai_api_key=api_key or os.getenv("AZURE_OPENAI_API_KEY"),
            azure_endpoint=azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")  # Explicitly pass the endpoint
        )

    synthesis_prompt = f"""
    You are a materials synthesis extraction assistant.

    Your task is to read the synthesis procedure from a scientific paper and extract the full synthesis process for **each distinct material synthesized**. If multiple materials are synthesized, return a list of JSON objects, one for each material.

    Only use information from the synthesis text provided in the Input section. If no synthesis information is present, return an empty list (`[]`).

    Each material's synthesis should be structured as follows:

    {{
      "material": "<exact formula or name as presented in the paper>",
      "synthesis": {{
        "steps": [
          {{
            "step": 1,
            "label": "<Short title of the step, e.g., 'Precursor Mixing'>",
            "details": {{
              "reagents": ["<chemical names>"],
              "temperature": "<value or null>",
              "duration": "<value or null>"
            }}
          }}
        ]
      }}
    }}

    ### Input:
    {synthesis_text}

    ### Output:
    <Only return a list of structured JSON objects corresponding to each material synthesized, and nothing else>
    """
    # Create a system message to set the context for the assistant
    # and provide the synthesis prompt
    # The system message is used to set the behavior and role of the assistant
    # in the conversation. In this case, it is set to be a materials synthesis extraction assistant.
    # The HumanMessage is the actual prompt that contains the synthesis text and instructions for extraction.
    # The assistant will read the synthesis text and extract the synthesis procedure for each material.
    # The assistant is expected to return a list of JSON objects, each containing the material name and its synthesis steps.
    system_msg = SystemMessage(content="You are a materials synthesis extraction assistant.")
    
    response = llm.invoke([
    system_msg,
    HumanMessage(content=synthesis_prompt)
    ])
    return response.content


def extract_challenges(
                       synthesis_text: str,
                       api_key: str = None,  
                       azure: bool = True, 
                       model_name: str = None, 
                       temp: float = 0, 
                       azure_api_version: str = None, 
                       azure_endpoint: str = None) -> str:
    """
    Extract challenges information from a given text.

    Parameters:
        synthesis_text (str): The text containing challenges information.
        api_key (str): OpenAI API key.
        azure (bool): Flag to indicate if Azure OpenAI service should be used.
        model_name (str, optional): The name of the Azure deployment or openai model name. Defaults to None.
        temp (float, optional): The temperature for the model. Defaults to 0.   
        azure_api_version (str, optional): The API version for Azure OpenAI service. Defaults to None.
        azure_endpoint (str, optional): The endpoint for Azure OpenAI service. Defaults to None.
        azure_api_version (str): The API version for Azure OpenAI service. Defaults to None.
        
    Returns:
        str: JSON string containing the extracted challenges.
    """
    if not azure:
        llm = ChatOpenAI(model_name=model_name or os.getenv("OPENAI_MODEL_NAME"),  # Replace with your OpenAI model name
                         temperature=temp, 
                         openai_api_key=api_key or os.getenv("OPENAI_API_KEY"))
    else:
        llm = AzureChatOpenAI(
            azure_deployment=model_name or os.getenv("AZURE_MODEL_DEPLOYMENT_NAME"),  # Replace with your Azure deployment name
            api_version=azure_api_version or os.getenv("AZURE_OPENAI_API_VERSION"),
            temperature=temp or 0,
            openai_api_key=api_key or os.getenv("AZURE_OPENAI_API_KEY"),
            azure_endpoint=azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")  # Explicitly pass the endpoint
        )

    challenges_prompt = f"""

    Your task is to carefully read the provided text about the synthesis, characterization, testing, and application of one or more materials. For each material discussed, identify any key **challenges** encountered during the research and the corresponding **solutions** the authors proposed or used.

    Only use information from the provided text. If no challenges are described for a material, do not include it in the output. If the text does not mention any challenges at all, return an empty list (`[]`).

    Return the results as a list of JSON objects. Each object should represent a single challenge related to a specific material and follow this format:

    [
      {{
        "material": "<exact formula or name as presented in the paper>",
        "stage": "Synthesis" | "Characterization" | "Testing" | "Post-processing" | "Application",
        "challenge": "Describe the specific problem the authors encountered in a concise way.",
        "impact": "Explain why this problem matters — what negative effect it has.",
        "solution": "Summarize how the authors addressed or solved this challenge.",
        "evidence": "Quote or summarize where in the paper this information is discussed (e.g., page numbers, figures, tables, or sections)."
      }}
    ]

    ### Input:
    {synthesis_text}

    ### Output:
    Return ONLY the JSON list of dictionaries, and nothing else.
    """
    system_msg = SystemMessage(content="You are a helpful and precise materials research analysis assistant.")
    response = llm.invoke([
    system_msg,
    HumanMessage(content=challenges_prompt)
    ])
    return response.content