In [86]:
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.output_parsers import JsonOutputParser

In [87]:
import yaml

with open('secrets.yml', 'r') as f:
    secrets = yaml.load(f, Loader=yaml.SafeLoader)

In [88]:
from langchain_groq import ChatGroq
import os

os.environ["GROQ_API_KEY"] = secrets['groq'][0]
chat_model = ChatGroq(
            model="llama3-70b-8192",
        )
json_model = ChatGroq(
            model="llama3-70b-8192",
        ).bind(response_format={"type": "json_object"})

## Params identifier

In [89]:
#%pip install openpyxl

In [137]:
params_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at identifying the correct conversion subprocess and the correct parameter
    selected by the user in his QUERY. \n
    
    As a context, you will receive two data arrays. PARAMS provides you the name of the parameters
    available to be selected. CONVERSION_SUBPROCESSES provides you the combination of 'cp' (conversion process name),
    'cin' (commodity in), 'cout' (commodity out) and 'scen' (scenario) in the format 'cp@cin@cout@scen'.\n
    
    Your goal is to output a JSON object containing three keys: 'param', 'value', 'cs_list'.
    'param' must receive the name of the selected parameter;
    'value' is the new value selected by the user;
    'cs_list' is a list with all matching conversion subprocesses (idealy only one if possible); \n
    
    NEVER MAKE UP DATA, USE ONLY DATA FROM THE GIVEN LIST. NEVER MODIFY THE SELECTED ENTRY, USE IT AS YOU
    FOUND IT IN THE LIST! \n
    
    If you can't find any match to the 'cp' name, leave the field 'cs_list' empty. If you can't find any match
    to the 'param' name, fill the field param with 'NOT_FOUND'. \n
    
    The field 'value' only accepts numeric input, unless the input given by the user contains [], in this
    case you should output it as a string. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY: {query} \n
    PARAMS: {params} \n
    CONVERSION_SUBPROCESSES: {CSs} \n
    Answer:
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["query","params","CSs"],
)

params_chain = params_prompt | json_model | JsonOutputParser()

In [138]:
import pandas as pd
import numpy as np

tmap = pd.ExcelFile('Models/DEModel.xlsx')
df = pd.read_excel(tmap,"ConversionSubProcess")

conversion_processes = np.asarray(df.iloc[:,0].dropna())
mask = np.where(conversion_processes != 'DEBUG')
conversion_processes = conversion_processes[mask]
parameters = np.asarray(df.columns[4:])

cs = np.asarray(df.iloc[:,0:4].dropna())
mask = np.where(cs[:,0] != 'DEBUG')
cs = cs[mask]
conversion_subprocesses = np.empty((len(cs),1),dtype=object)

for i in range(len(cs)):
    conversion_subprocesses[i] = f'{cs[i,0]}@{cs[i,1]}@{cs[i,2]}@{cs[i,3]}'

In [139]:
#query = 'Modify the operational cost of biomass CHPs with help_biomass_chp as input and electricity as output to 1000'
query = 'Modify the wtf factor of go kart generators to 100'
query = 'Modify the operational cost of biomass CHPs with electricity output to [2020 660;2050 555]'

params_chain.invoke({"query": query, "params": parameters, "CSs": conversion_subprocesses})

{'param': 'opex_cost_energy',
 'value': '[2020 660;2050 555]',
 'cs_list': ['Biomass_CHP@Biomass@Electricity@Base']}

In [126]:
import pandas as pd
import numpy as np
from tabulate import tabulate

def param_selector(state):

    print("---PARAM SELECTOR---")
    query = state['query']
    num_steps = state['num_steps']
    num_steps += 1

    tmap = pd.ExcelFile('Models/DEModel.xlsx')
    df = pd.read_excel(tmap,"ConversionSubProcess")

    conversion_processes = np.asarray(df.iloc[:,0].dropna())
    mask = np.where(conversion_processes != 'DEBUG')
    conversion_processes = conversion_processes[mask]
    parameters = np.asarray(df.columns[4:])

    cs = np.asarray(df.iloc[:,0:4].dropna())
    mask = np.where(cs[:,0] != 'DEBUG')
    cs = cs[mask]
    conversion_subprocesses = np.empty((len(cs),1),dtype=object)

    for i in range(len(cs)):
        conversion_subprocesses[i] = f'{cs[i,0]}@{cs[i,1]}@{cs[i,2]}@{cs[i,3]}'

    output = params_chain.invoke({"query": query, "params": parameters, "CSs": conversion_subprocesses})
    
    print('---CONFIRM SELECTION---')
    
    cs_list = output['cs_list']
    param = output['param']
    new_value = output['value']
    data = []
    for i in range(len(cs_list)):
        elements = cs_list[i].split('@')
        data.append([i+1,elements[0],elements[1],elements[2],elements[3]])
        table = tabulate(data, headers=["Index", "CP", "CIN", "COUT", "Scen"])
    
    if len(data) == 0:
        print('No matching conversion subprocess was found.')
        cs_confirm = 'N'
    elif len(data) == 1:
        print('The following matching conversion subprocess was found:\n')
        print(table)
        cs_confirm = input('Is that correct? (Y or N)\n')
        cs_select = 0 if cs_confirm == 'Y' else 'NONE'
    else:
        print('The following conversion subprocesses were found:\n')
        print(table)
        cs_select = int(input('Input the number of the correct CS (or 0 if it\'s none of these):\n')) - 1
        cs_confirm = 'Y' if cs_select != -1 else 'N'
    
    if cs_confirm == 'N':
        print('FINAL ANSWER: No matching selection.')
        return {"num_steps": num_steps,
                "cs": 'NO_MATCH',
                "selection_is_valid": False,
                "parameter": 'NO_MATCH'}
        
    if param in parameters:
        param_confirm = input(f'You want to modify the parameter {param}, is that correct? (Y or N)\n')
    else:
        print('No matching parameter was found.')
        param_confirm = 'N'
        
    if param_confirm == 'N':
        print('FINAL ANSWER: No matching selection.')
        return {"num_steps": num_steps,
                "cs": cs_list[cs_select],
                "selection_is_valid": False,
                "parameter": 'NO_MATCH'}
    else:
        print(f'FINAL ANSWER: CS: {cs_list[cs_select]}; Param: {param}')
        return {"num_steps": num_steps,
                "cs": cs_list[cs_select],
                "new_value": new_value,
                "selection_is_valid": True,
                "parameter": param}
    
    

## Scenario identifier

In [94]:
scenario_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at identifying the correct scenario choosen by the user
    in his QUERY to have the simulation run. \n
    
    As a context, you will receive a data array called SCENARIOS, which contains
    all of the scenarios that are available to be simulated. \n
    
    Your goal is to output a JSON object containing one key called 'scenario_name' that contains
    the name of the scenario selected by the user. \n
    
    NEVER MAKE UP DATA, USE ONLY DATA FROM THE GIVEN LIST. If you can't find any match to the asked scenario,
    simply fill the key 'scenario_name' with 'NOT_FOUND'. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY: {query} \n
    SCENARIOS: {scenarios} \n
    Answer:
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["query","scenarios"],
)
scenario_chain = scenario_prompt | json_model | JsonOutputParser()

query = 'Run 8760h for DEModel'
scenarios = ['Base', 'Test', 'Base4twk', 'Base8760h', 'Base8twk']
scenario_chain.invoke({"query": query, "scenarios": scenarios})

{'scenario_name': 'Base8760h'}

In [95]:
import pandas as pd
import numpy as np

def scenario_selector(state):
    print('---SCENARIO SELECTOR---')
    num_steps = state['num_steps']
    num_steps += 1
    query = state['query']
    
    tmap = pd.ExcelFile('Models/DEModel.xlsx')
    df = pd.read_excel(tmap,"Scenario")
    scenarios = np.asarray(df.iloc[:,0].dropna())
    
    output = scenario_chain.invoke({'query': query, 'scenarios': scenarios})
    identified_scenario = output['scenario_name']
    print(f'IDENTIFIED SCENARIO: {identified_scenario}')
    
    if identified_scenario == 'NOT_FOUND' or not(identified_scenario in scenarios):
        print('No valid scenario was identified in the request, here are the available scenarios:\n')
        for i in range(len(scenarios)):
            print(f'{i+1} - {scenarios[i]}')
        selection = int(input('Select the desired scenario to be run (select 0 if none of these):\n'))-1
        if selection != -1:
            identified_scenario = scenarios[selection]
        else:
            identified_scenario = 'NOT_FOUND'
    
    if identified_scenario == 'NOT_FOUND':
        message = 'No valid scenario was found'
        valid = False
    else:
        message = f'Selected scenario for simulation: {identified_scenario}'
        valid = True
        
    print(message)
    return {'num_steps': num_steps,
            'scenario': identified_scenario,
            'selection_is_valid': valid,
            'final_answer': message}

## Plot identifier

In [96]:
plotter_id_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at identifying from the user's QUERY the correct plot type requested by
    the user and the desired variable the user wants to plot. \n
    
    As a context, you will receive two data arrays:
    PLOT_TYPES will provide you information about the available plot types;
    VARIABLES will provide you information about the available variables to be plotted. \n
    
    Your goal is to output a JSON OBJECT containing only two keys 'plot_type' and 'variable'.
    'plot_type' will receive the selected plot type from PLOT_TYPES, if you can't find the plot
    requested by the user in the list, fill the key with 'NOT_FOUND';
    'variable' will receive the selected variable from VARIABLES, if you can't find the
    variable requested by the user fill the key with 'NOT_FOUND'. \n
    
    NEVER MAKE UP DATA, USE ONLY DATA FROM THE GIVEN LISTS. \n

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    QUERY: {query} \n
    PLOT_TYPES: {plot_types} \n
    VARIABLES: {variables} \n
    Answer:
    <|eot_id|>
    <|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["query","plot_types","variables"],
)
plotter_id_chain = plotter_id_prompt | json_model | JsonOutputParser()

query = 'Show me the sankey plot for DEModel with Base scenario'
plot_types = ['Bar', 'TimeSeries', 'Sankey', 'SingleValue']
variables = ['TOTEX','OPEX','CAPEX','total_annual_co2_emission','cap_active','cap_new','cap_res','pin (power input)','pout (power output)']
plotter_id_chain.invoke({"query": query, "plot_types": plot_types, "variables": variables})

{'plot_type': 'Sankey', 'variable': 'NOT_FOUND'}

In [97]:
def plot_selector(state):
    print('---PLOT SELECTOR---')
    num_steps = state['num_steps']
    num_steps += 1
    query = state['query']
    
    plot_types = ['Bar', 'TimeSeries', 'Sankey', 'SingleValue']
    variables = ['TOTEX','OPEX','CAPEX','total_annual_co2_emission','cap_active','cap_new','cap_res','pin','pout']
    
    output = plotter_id_chain.invoke({"query": query, "plot_types": plot_types, "variables": variables})
    identified_plot = output['plot_type']
    identified_variable = output['variable']
    print(f'IDENTIFIED PLOT: {identified_plot}\nIDENTIFIED VARIABLE: {identified_variable}')
    
    if identified_plot == 'NOT_FOUND' or not(identified_plot in plot_types):
        print('No valid plot type was identified in the request, here are the available plot types:\n')
        for i in range(len(plot_types)):
            print(f'{i+1} - {plot_types[i]}')
        selection = int(input('Select the desired type of plot (select 0 if none of these):\n'))-1
        if selection != -1:
            identified_plot = plot_types[selection]
        else:
            identified_plot = 'NOT_FOUND'
        
    if identified_variable == 'NOT_FOUND' or not(identified_variable in variables):
        print('No valid variable was identified in the request, here are the available variables:\n')
        for i in range(len(variables)):
            print(f'{i+1} - {variables[i]}')
        selection = int(input('Select the desired variable to be plotted (select 0 if none of these):\n'))-1
        if selection != -1:
            identified_variable = plot_types[variables]
        else:
            identified_variable = 'NOT_FOUND'
    
    if identified_plot == 'NOT_FOUND' or identified_variable == 'NOT_FOUND':
        message = 'No valid plot was identified'
        valid = False
    else:
        message = f'Selected plot: {identified_plot} for {identified_variable}'
        valid = True
    
    print(message)
    
    return {'num_steps': num_steps,
            'plot_type': identified_plot,
            'variable': identified_variable,
            'selection_is_valid': valid,
            'final_answer': message}

## Model modifier node

In [122]:
from openpyxl import load_workbook

def model_modifier(state):
    print('---MODEL MODIFIER---')
    
    model = state['model']
    parameter = state['parameter']
    cs = state['cs']
    new_value = state['new_value']
    num_steps = state['num_steps']
    num_steps += 1
    
    workbook = load_workbook(filename=f'Models/{model}.xlsx')
    cs_sheet = workbook['ConversionSubProcess']
    
    #open workbook
    param_idx = '0'
    cs_idx = '0'
    for idx, row in enumerate(cs_sheet.rows):
        if idx == 0:
            for i in range(len(row)):
                if row[i].value == parameter:
                    param_idx = row[i].coordinate
        else:
            if f'{row[0].value}@{row[1].value}@{row[2].value}@{row[3].value}' == cs:
                cs_idx = row[0].coordinate
    if param_idx == '0' or cs_idx == '0':
        final_answer = 'Selected param or cs not found.'
        print('Selected param or cs not found.')
    else:
        print(f'Cell: {param_idx[0]}{cs_idx[1:]}')
        old_value = cs_sheet[f'{param_idx[0]}{cs_idx[1:]}'].value
        cs_sheet[f'{param_idx[0]}{cs_idx[1:]}'].value = new_value
        workbook.save(filename="Models/DEModel_modified.xlsx")
        final_answer = f'Value successfully modified from {old_value} to {new_value}'
        print(final_answer)
    return {"num_steps": num_steps,
            "final_answer": final_answer}
    

## Sim runner node

In [99]:
def sim_runner(state):
    print('---SIMULATION RUNNER---')
    
    num_steps = state['num_steps']
    num_steps += 1
    model = state['model']
    scenario = state['scenario']
    
    print(f'FINAL COMMAND: python cesm.py run {model} {scenario}\n')
    
    return {"num_steps": num_steps,
            "final_answer": 'The requested simulation was successfully submited!'}

## Plotter node

In [100]:
def plotter(state):
    print('---PLOTTER---')
    
    num_steps = state['num_steps']
    num_steps += 1
    model = state['model']
    scenario = state['scenario']
    plot_type = state['plot_type']
    variable = state['variable']
    
    print(f'FINAL COMMAND: python cesm.py plot {model} {scenario} {plot_type} {variable} \n')
    
    return {"num_steps": num_steps,
            "final_answer": 'The requested data was successfully plotted!'}

## Output generator

In [101]:
## OUTPUT GENERATOR
output_generator_prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a specialist at answering the user based on context given. \n
    
    Given the INITIAL_QUERY and a CONTEXT, generate an answer for the query
    asked by the user. You should make use of the provided information
    to answer the user in the best possible way. If you think the answer
    does not answer the user completely, ask the user for the necessary
    information if possible. \n
    
    It's important never to cite that you got it from a context, the user should
    think that you know the information.

    <|eot_id|><|start_header_id|>user<|end_header_id|>
    INITIAL_QUERY: {initial_query} \n
    CONTEXT: {context} \n
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["initial_query","context"],
)
output_generator_chain = output_generator_prompt | chat_model | StrOutputParser()

# query = 'Is my car more powerful than a GT-R R32?'
# context = 'The car owned by the user is from 2010 and has 100 hp'
# print(output_generator_chain.invoke({"initial_query": query, "context": context}))

In [102]:
def output_generator(state):
    print("---GENERATE OUTPUT---")
    ## Get the state
    initial_query = state['query']
    context = state['final_answer']
    num_steps = state['num_steps']
    num_steps += 1

    answer = output_generator_chain.invoke({"initial_query": initial_query,
                                            "context": context})
    print(f'GENERATED OUTPUT:\n{answer}\n')
    
    return {"num_steps": num_steps,
            "final_answer": answer}

## Routers

In [103]:
def entry_node(state):
    return None

In [104]:
def route_to_es_tool(state):
    """
    Route to the necessary tool.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """
    selection = state['selected_tool']
    
    if selection == 'data_plotter':
        print("---ROUTE QUERY TO DATA PLOTTER---")
        return "data_plotter"
    elif selection == 'sim_runner':
        print("---ROUTE QUERY TO SIMULATION RUNNER---")
        return "sim_runner"
    elif selection == 'model_modifier':
        print("---ROUTE QUERY TO MODEL MODIFIER---")
        return "model_modifier"

In [105]:
def selection_validator(state):
    selection_is_valid = state['selection_is_valid']
    selected_tool = state['selected_tool']
    
    if selection_is_valid:
        return selected_tool
    else:
        return "end_not_valid"

## Build graph

In [140]:
from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict

### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        initial_query: user input
        next_query: partial query generated by the agent
        num_steps: number of steps
        selected_tool: name of the selected tool
        rag_questions: questions used for retrieval
        tool_parameters: parameters to be used by tools
        context: list of context generated for the query
        complete_data: indicates completeness of data
        final_answer: LLM generation
    """
    query : str
    num_steps : int
    selected_tool: str
    model: str
    scenario: str
    parameter: str
    cs: str
    variable: str
    plot_type: str
    new_value: float
    selection_is_valid: bool
    final_answer: str

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("entry_node", entry_node)
workflow.add_node("inter_node", entry_node)
workflow.add_node("param_selector", param_selector)
workflow.add_node("scenario_selector", scenario_selector)
workflow.add_node("plot_selector", plot_selector)
workflow.add_node("model_modifier", model_modifier)
workflow.add_node("sim_runner", sim_runner)
workflow.add_node("plotter", plotter)
workflow.add_node("output_generator", output_generator)

workflow.set_entry_point("entry_node")
workflow.add_conditional_edges(
    "entry_node",
    route_to_es_tool,
    {
        "data_plotter": "scenario_selector",
        "sim_runner": "scenario_selector",
        "model_modifier": "param_selector"
    }
)

workflow.add_conditional_edges(
    "scenario_selector",
    route_to_es_tool,
    {
        "data_plotter": "plot_selector",
        "sim_runner": "inter_node"
    }
)

workflow.add_conditional_edges(
    "param_selector",
    selection_validator,
    {
        "model_modifier": "model_modifier",
        "end_not_valid": "output_generator"
    }
)

workflow.add_conditional_edges(
    "plot_selector",
    selection_validator,
    {
        "data_plotter": "plotter",
        "end_not_valid": "output_generator"
    }
)

workflow.add_conditional_edges(
    "inter_node",
    selection_validator,
    {
        "sim_runner": "sim_runner",
        "end_not_valid": "output_generator"
    }
)

workflow.add_edge("model_modifier", "output_generator")
workflow.add_edge("plotter", "output_generator")
workflow.add_edge("sim_runner", "output_generator")
workflow.add_edge("output_generator", END)

app = workflow.compile()

In [141]:
#query = 'The sankey plot for OPEX in the Base scenario of DEModel'
#query = 'Run the Base scenario of DEModel please'
query = 'Modify the investment cost power of the Biomass CHP to [2020 660;2050 555]'


inputs = {"query": query, "selected_tool": 'model_modifier', "model": 'DEModel', "num_steps": 0}
for output in app.stream(inputs, {"recursion_limit": 50}):
    for key, value in output.items():
        print(f"Finished running: {key}:")

---ROUTE QUERY TO MODEL MODIFIER---
---PARAM SELECTOR---
---CONFIRM SELECTION---
The following conversion subprocesses were found:

  Index  CP           CIN               COUT                Scen
-------  -----------  ----------------  ------------------  ------
      1  Biomass_CHP  Biomass           Help_Biomass_CHP    Base
      2  Biomass_CHP  Help_Biomass_CHP  Dummy               Base
      3  Biomass_CHP  Help_Biomass_CHP  Electricity         Base
      4  Biomass_CHP  Help_Biomass_CHP  Industrial_Heat_LT  Base
FINAL ANSWER: CS: Biomass_CHP@Help_Biomass_CHP@Dummy@Base; Param: capex_cost_power
Finished running: param_selector:
---MODEL MODIFIER---
Cell: R60
Value successfully modified from None to [2020 660;2050 555]
Finished running: model_modifier:
---GENERATE OUTPUT---
GENERATED OUTPUT:
The investment cost power of the Biomass CHP has been successfully modified to [2020: 660; 2050: 555]. This means that the cost of investing in biomass combined heat and power (CHP) technology 