In [1]:
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from datetime import datetime
from dateutil.relativedelta import relativedelta
from tqdm import tqdm
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

## Query generator
Query generation is needed to make requests to **KPI engine** and **predictor** module.<br>
Both communications requires same _url format_ to make requests to the modules.


The following is the url format with an example:<br>
http://127.0.0.1:8000/{kpi,predictor}/kpi_id/calculate?optionalParameters<br>
<br>
Example (**today** is 26/11/2024):<br>
_Prompt_: Calculate Energy Cost Working Time for Testing Machine 3 for last month<br>
_Query_: http://127.0.0.1:8000/kpi/energy_cost_working_time/calculate?machineType=TestingMachine3&startTime=2024-10-26&endTime=2024-11-25<br><br>

_Prompt_: Predict the future Energy Cost Working Time for next 5 days for Testing Machine 3 based on last month data<br>
_Query_: http://127.0.0.1:8000/kpi/energy_cost_working_time/calculate?machineType=TestingMachine3&startTime=2024-10-26&endTime=2024-11-25&startTimeP=2024-11-27&endTimeP=2024-12-01<br><br>

In [2]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
# for the few shot examples to be effective, instead of updating their 'data' field dinamically with the context of the current day of execution to let llm fill data with absolute timedata, llm must fill data with relative time data
# few shots examples
esempi = [
    {"testo": "Predict for tomorrow the Energy Cost Working Time for Large Capacity Cutting Machine 2 based on last week data", "data": f"Energy Cost Working Time, Large Capacity Cutting Machine 2, weeks=1, days=1" },
    {"testo": "Predict the future Power Consumption Efficiency for Riveting Machine 2 over the next 5 days","data": f"Power Consumption Efficiency, Riveting Machine 2, NULL, days=5"},
    {"testo": "Can you calculate Machine Utilization Rate for Assembly Machine 1 for yesterday?", "data": f"Machine Utilization Rate,  Assembly Machine 1, days=1, NULL"},
    {"testo": "Calculate Machine Usage Trend for Laser Welding Machine 1 for today", "data": f"Machine Usage Trend, Laser Welding Machine 1, days=0, NULL"},
    {"testo": "Calculate for the last 2 weeks Cost Per Unit for Laser Welding Machine 2", "data": f"Cost Per Unit, Laser Welding Machine 2, weeks=2, NULL"},
    {"testo": "Can you predict for the future 3 weeks the Energy Cost Working Time for Large Capacity Cutting Machine 2 based on 24/10/2024 data?", "data": f"Energy Cost Working Time, Large Capacity Cutting Machine 2, 2024-10-24->2024-10-24, weeks=3"}
    
]


# Few shot prompt creation
esempio_template = PromptTemplate(
    input_variables=["testo", "data"],
    template="Text: {testo}\nData: {data}\n"
)

few_shot_prompt = FewShotPromptTemplate(
    examples=esempi,
    example_prompt=esempio_template,
    suffix= "Fill the Data field for the following prompt \nText: {text_input}\nData:\nThe filled field needs to contain four values as the examples above",
    input_variables=["text_input"]
)


### Test

In [3]:
text = "calculate for last 4 days Operative Time for Laser Welding Machine 1"
prompt = few_shot_prompt.format(text_input= text)
data = llm.invoke(prompt)
print(data.content)

Data: Operative Time, Laser Welding Machine 1, days=4, NULL



There have been conducted some test with very good results, using as input prompt with different temporal context (today/tomorrow/yesterday/days/months/weeks/years/specific month/intervall of date/specific date).

### Parser
A simple parser to convert the llm output pattern to the _url_ to communicate the input prompt to the KPI Engine or the Predictor

In [22]:
def toDate(data):
    TODAY = datetime.now()
    # first data available is from 2024-03-01
    FIRST_DAY = "2024-03-01"

    if "->" in data:
        date=data
    # NULL -> all dataset use case
    elif data == "NULL":
        date=f"{FIRST_DAY}->{(TODAY - relativedelta(days=1)).strftime('%Y-%m-%d')}"
    else:
        temp =data.split("=")
        _type = temp[0]
        _value = int(temp[1])
        delta = 0
        if _type == "days":
            delta= relativedelta(days=_value) 
        elif _type =="weeks":
            delta= relativedelta(weeks=_value)
        elif _type =="months":
            delta= relativedelta(months=_value)
        elif _type =="years":
            delta= relativedelta(years=_value)
        #'today' use case
        if delta == relativedelta():
            date=f"{(TODAY - delta).strftime('%Y-%m-%d')}->{(TODAY - delta).strftime('%Y-%m-%d')}"
        else:
            date=f"{(TODAY - delta).strftime('%Y-%m-%d')}->{(TODAY - relativedelta(days=1)).strftime('%Y-%m-%d')}"
    return date


In [23]:
# an example label
label = "kpi_calc"

data=data.content.strip("\n").split(": ")[1].split(", ")
kpi_id = data[0].lower().replace(" ","_")
machine_id = data[1].replace(" ","")
# first couple of dates parsing
date=toDate(data[2]).split("->")
url=f"http://127.0.0.1:8000/{label}/{kpi_id}/calculate?machineType={machine_id}&startTime={date[0]}&endTime={date[1]}"
if label == "predictions":
    # second couple of dates parsing
    # a data labelled as 'predictor' should not be 'NULL', this (before in the pipeline) should be checked to be true 
    dateP = toDate(data[3]).split("->")
    url+=f"&startTimeP={dateP[0]}&endTimeP={dateP[1]}"

print(url)

http://127.0.0.1:8000/kpi_calc/operative_time/calculate?machineType=LaserWeldingMachine1&startTime=2024-11-22&endTime=2024-11-25
