In [28]:
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

In [29]:
import yaml

with open('secrets.yml', 'r') as f:
    secrets = yaml.load(f, Loader=yaml.SafeLoader)

In [30]:
from langchain_groq import ChatGroq
import os

os.environ["GROQ_API_KEY"] = secrets['groq'][0]
chat_model = ChatGroq(
            model="llama3-70b-8192",
        )
json_model = ChatGroq(
            model="llama3-70b-8192",
        ).bind(response_format={"type": "json_object"})

## Model modifier

In [31]:
#%pip install openpyxl

In [32]:
params_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at identifying the correct conversion subprocess and the correct parameter
    selected by the user in his QUERY. \n
    
    As a context, you will receive two data arrays. PARAMS provides you the name of the parameters
    available to be selected. CONVERSION_SUBPROCESSES provides you the combination of 'cp' (conversion process name),
    'cin' (commodity in), 'cout' (commodity out) and 'scen' (scenario) in the format 'cp@cin@cout@scen'.\n
    
    Your goal is to output a JSON object containing three keys: 'param', 'value', 'cs_list'.
    'param' must receive the name of the selected parameter;
    'value' is the new value selected by the user;
    'cs_list' is a list with all matching conversion subprocesses (idealy only one if possible); \n
    
    NEVER MAKE UP DATA, USE ONLY DATA FROM THE GIVEN LIST. If you can't find any match to the 'cp' name, leave the field
    'cs_list' empty. \n
    
    The field 'value' only accepts numeric input. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY: {query} \n
    PARAMS: {params} \n
    CONVERSION_SUBPROCESSES: {CSs} \n
    Answer:
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["query","params","CSs"],
)

params_chain = params_prompt | json_model | JsonOutputParser()

In [33]:
import pandas as pd
import numpy as np

tmap = pd.ExcelFile('Models/DEModel.xlsx')
df = pd.read_excel(tmap,"ConversionSubProcess")

conversion_processes = np.asarray(df.iloc[:,0].dropna())
mask = np.where(conversion_processes != 'DEBUG')
conversion_processes = conversion_processes[mask]
parameters = np.asarray(df.columns[4:])

cs = np.asarray(df.iloc[:,0:4].dropna())
mask = np.where(cs[:,0] != 'DEBUG')
cs = cs[mask]
conversion_subprocesses = np.empty((len(cs),1),dtype=object)

for i in range(len(cs)):
    conversion_subprocesses[i] = f'{cs[i,0]}@{cs[i,1]}@{cs[i,2]}@{cs[i,3]}'

In [34]:
#query = 'Modify the operational cost of biomass CHPs with help_biomass_chp as input and electricity as output to 1000'
query = 'Modify the wtf factor of go kart generators to 100'

params_chain.invoke({"query": query, "params": parameters, "CSs": conversion_subprocesses})

{'param': 'wtf_factor', 'value': 100, 'cs_list': []}

In [35]:
import pandas as pd
import numpy as np

def param_selector(state):

    print("---PARAM SELECTOR---")
    query = state['query']
    num_steps = state['num_steps']
    parameters = state['parameters']
    conversion_subprocesses = state['CSs']
    num_steps += 1

    tmap = pd.ExcelFile('Models/DEModel.xlsx')
    df = pd.read_excel(tmap,"ConversionSubProcess")

    conversion_processes = np.asarray(df.iloc[:,0].dropna())
    mask = np.where(conversion_processes != 'DEBUG')
    conversion_processes = conversion_processes[mask]
    parameters = np.asarray(df.columns[4:])

    cs = np.asarray(df.iloc[:,0:4].dropna())
    mask = np.where(cs[:,0] != 'DEBUG')
    cs = cs[mask]
    conversion_subprocesses = np.empty((len(cs),1),dtype=object)

    for i in range(len(cs)):
        conversion_subprocesses[i] = f'{cs[i,0]}@{cs[i,1]}@{cs[i,2]}@{cs[i,3]}'

    output = params_chain.invoke({"query": query, "params": parameters, "CSs": conversion_subprocesses})
    
    return {"output": output,
            "num_steps": num_steps}

In [36]:
from tabulate import tabulate

def confirm_selection(state):
    
    print('---CONFIRM SELECTION---')
    output = state['output']
    cs_list = output['cs_list']
    param = output['param']
    num_steps = state['num_steps']
    num_steps += 1
    
    tmap = pd.ExcelFile('Models/DEModel.xlsx')
    df = pd.read_excel(tmap,"ConversionSubProcess")
    parameters = np.asarray(df.columns[4:])
    
    data = []
    for i in range(len(cs_list)):
        elements = cs_list[i].split('@')
        data.append([i+1,elements[0],elements[1],elements[2],elements[3]])
        table = tabulate(data, headers=["Index", "CP", "CIN", "COUT", "Scen"])
    
    if len(data) == 0:
        print('No matching conversion subprocess was found.')
        cs_confirm = 'N'
    elif len(data) == 1:
        print('The following matching conversion subprocess was found:\n')
        print(table)
        cs_confirm = input('Is that correct? (Y or N)\n')
        cs_select = 0 if cs_confirm == 'Y' else 'NONE'
    else:
        print('The following conversion subprocesses were found:\n')
        print(table)
        cs_select = int(input('Input the number of the correct CS (or 0 if it\'s none of these):\n')) - 1
        cs_confirm = 'Y' if cs_select != -1 else 'N'
    
    if cs_confirm == 'N':
        return {"final_answer": 'No matching selection'}
        
    if param in parameters:
        param_confirm = input(f'You want to modify the parameter {param}, is that correct? (Y or N)\n')
    else:
        print('No matching parameter was found.')
        param_confirm = 'N'
        
    if param_confirm == 'N':
        return {"final_answer": 'No matching selection'}
    else:
        return {"final_answer": f'CS: {cs_list[cs_select]}; Param: {param}'}

In [37]:
def print_final(state):
    final = state['final_answer']
    print(f'FINAL ANSWER: {final}')
    
    return None

In [38]:
def param_router(state):
    
    print("---PARAM ROUTER---")
    output = state['output']
    
    if output['found_cps'] == 1:
        return 'confirm_selection'
    elif output['found_cps'] > 1:
        return 'select_from_found'
    else:
        return 'param_not_found'

In [39]:
from langchain.schema import Document
from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict
from typing import List

### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        initial_query: user input
        next_query: partial query generated by the agent
        num_steps: number of steps
        selected_tool: name of the selected tool
        rag_questions: questions used for retrieval
        tool_parameters: parameters to be used by tools
        context: list of context generated for the query
        complete_data: indicates completeness of data
        final_answer: LLM generation
    """
    query : str
    parameters: List[str]
    CSs: List[str]
    num_steps : int
    user_answer: str
    output: str
    final_answer : str

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("param_selector", param_selector)
workflow.add_node("confirm_selection", confirm_selection)
workflow.add_node("print_final", print_final)

In [40]:
workflow.set_entry_point("param_selector")
workflow.add_edge("param_selector", "confirm_selection")
workflow.add_edge("confirm_selection", "print_final")
workflow.add_edge("print_final", END)

In [41]:
app = workflow.compile()

In [44]:
query = 'Modify the operational cost of biomass CHPs with electricity output to 1000'

inputs = {"query": query, "parameters": parameters, "CSs": conversion_subprocesses, "num_steps": 0,}
for output in app.stream(inputs, {"recursion_limit": 50}):
    for key, value in output.items():
        print(f"Finished running: {key}:")

---PARAM SELECTOR---
Finished running: param_selector:
---CONFIRM SELECTION---
The following conversion subprocesses were found:

  Index  CP                          CIN          COUT                    Scen
-------  --------------------------  -----------  ----------------------  -----------
      1  PP_Biomass                  Biomass      Electricity             Base
      2  PP_Coal                     Coal         Electricity             Base
      3  PP_Gas                      Gas          Electricity             Base
      4  PP_Lignite                  Lignite      Electricity             Base
      5  PP_Nuclear                  Uranium      Electricity             Base
      6  PP_PV_Res                   Dummy        Electricity             Base
      7  PP_PV_New                   Dummy        Electricity             Base
      8  PP_Run_of_River             Dummy        Electricity             Base
      9  PP_WindOff_Res              Dummy        Electricity            