# Final LLM test

In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import os
import sys

current_dir = %pwd
parent_dir = os.path.abspath(os.path.join(current_dir, '../'))
sys.path.append(parent_dir)

# LLM API setup
load_dotenv(dotenv_path=parent_dir + "\RAG\.env")
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
!pip install pandas



In [2]:
import pandas as pd
import re

# Function to parse the file and extract the data to create a DataFrame
def parse_file(file_path):
    # Initialize lists to store extracted data
    request_types = []
    questions = []
    contexts = []

    # Read the file content
    with open(file_path, 'r') as file:
        content = file.read()

    # Split the file into sections based on request type
    sections = re.split(r"--(.*?)--", content)
    
    # Process each section
    for i in range(1, len(sections), 2):  # request type is odd index, content is even index
        request_type = sections[i].strip()
        section_content = sections[i + 1].strip()
        
        # Extract QUESTION and CONTEXT
        question_match = re.search(r"QUESTION\n(.*?)END_QUESTION", section_content, re.DOTALL)
        context_match = re.search(r"CONTEXT\n(.*?)END_CONTEXT", section_content, re.DOTALL)
        
        if question_match and context_match:
            question = question_match.group(1).strip()
            context = context_match.group(1).strip()
            
            # Append to lists
            request_types.append(request_type)
            questions.append(question)
            contexts.append(context)

    # Create the DataFrame
    df = pd.DataFrame({
        "request_type": request_types,
        "question": questions,
        "context": contexts
    })

    return df

In [3]:
dataframe = parse_file(parent_dir + "\\tests\\final_response_dataset.txt")
dataframe

Unnamed: 0,request_type,question,context
0,KPI_CALCULATION,Compute me power for Laser cutter on 12/10/2024,{\nMachine name: Laser Cutter\nKPI name: power...
1,KPI_CALCULATION,Compute me idle time both for Riveting Machine...,{\nMachine name: Riveting Machine\nKPI name: i...
2,KPI_FORECASTING,Predict idle time both for Riveting Machine an...,{\nMachine name: Riveting Machine\nKPI name: i...
3,KPI_FORECASTING,Predict next week cycles of Large Capacity Cut...,Today date: 25/11/2024\n{\nMachine name: Large...
4,REPORT_GENERATION,Return me the report about last month,"{\n Machine name: Riveting Machine,\n KPI na..."
5,REPORT_GENERATION,Return me the report of Riveting Machine idle_...,Today date: 25/11/2024\n\n{\n Machine name: R...
6,NEW_KPI_GENERATION,Generate a new KPI that shows the idle time ratio,* **KPI ID: idle_time_avg**\n * Formula: -\...
7,NEW_KPI_GENERATION,Generate a new KPI that describe the operative...,* **KPI ID: idle_time_avg**\n * Formula: -\...
8,DASHBOARD_GENERATION,Which is most suitable dashboard to visualize ...,Bar Chart \n - ID: `bar_chart` \n - Descrip...
9,DASHBOARD_GENERATION,Generate a dashboard to visualize workfloor pr...,Bar Chart \n - ID: `bar_chart` \n - Descrip...


In [5]:
from schemas import promptmanager

# Retrieve the prompt for each request type
def get_prompt(filepath):
    with open(parent_dir + filepath, 'r') as file:
        prompt = file.read()
    return prompt

KPI_CALCULATION_PROMPT = get_prompt("\\prompts\\calculation_and_forecasting.txt")
KPI_FORECASTING_PROMPT = get_prompt("\\prompts\\calculation_and_forecasting.txt")
NEW_KPI_GENERATION_PROMPT = get_prompt("\\prompts\\kpi_generation.txt")
REPORT_GENERATION_PROMPT = get_prompt("\\prompts\\report.txt")
DASHBOARD_GENERATION_PROMPT = get_prompt("\\prompts\\dashboard_generation.txt")

selected_prompt = {
    "KPI_CALCULATION": KPI_CALCULATION_PROMPT,
    "KPI_FORECASTING": KPI_CALCULATION_PROMPT,
    "NEW_KPI_GENERATION": NEW_KPI_GENERATION_PROMPT,
    "REPORT_GENERATION": REPORT_GENERATION_PROMPT,
    "DASHBOARD_GENERATION": DASHBOARD_GENERATION_PROMPT,
}


In [6]:
def generate_responses(dataframe, selected_prompt, llm):
    responses = []

    for _, row in dataframe.iterrows():
        request_type = row['request_type']
        question = row['question']
        context = row['context']

        # Select the appropriate prompt template
        prompt_template = selected_prompt.get(request_type, "")
        
        # Replace placeholders with actual values
        prompt = prompt_template.replace("{_USER_QUERY_}", question).replace("{_CONTEXT_}", context)
        
        # Call the LLM with the completed prompt
        response = llm.invoke(prompt)
        
        # Append the response to the list
        responses.append(response)

    return responses

# Generate responses
responses = generate_responses(dataframe, selected_prompt, llm)

# Write responses to a file
with open(parent_dir + "\\tests\\final_response_results.txt", 'w') as file:
    for i, response in enumerate(responses):
        file.write(f"Question:\n {dataframe.iloc[i]['question']}\n")
        file.write(f"\nContext:\n {dataframe.iloc[i]['context']}\n")
        file.write(f"\nResponse:\n {response.content}\n\n")