In [1]:
import sys
import os

# Add the 'code' directory to the Python path
sys.path.append(os.path.abspath("./code"))
import streamlit as st
from clients.neo4j_client import Neo4jClient
from clients.openai_client import OpenAiClient
from clients.langchain_client import LangChainClient
from components.intent_matching import get_input_parameter, get_request_intent
from components.extract_node_info import match_node
from constants.prompt_templates import USER_RESPONSE_TEMPLATE, INTENT_MATCHING_TEMPLATE
from constants.chatbot_responses import CHATBOT_INTRO_MESSAGE, FAILED_INTENT_MATCH, CYPHER_QUERY_ERROR, NOT_RELEVANT_USER_REQUEST, NO_RESULTS_FOUND
from constants.db_constants import DATABASE_SCHEMA
from constants.query_templates import query_map
from components.parameter_correction import ParameterCorrection
from gui.graph_test import fetch_graph_data
import logging
import os
from streamlit_agraph import agraph, Node, Edge, Config
from streamlit_image_zoom import image_zoom
from PIL import Image

from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from constants.prompt_templates import UNCOMMON_QUESTION_WORKFLOW_TEMPLATE
from langchain.prompts.prompt import PromptTemplate
from langchain_community.graphs import Neo4jGraph
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCaseParams
# TO DO SWITCH TO EVAL STEPS VS CRITERIA
query_correctness_metric = GEval(
    name="Correctness",
    model="gpt-4o",
    #criteria="Determine whether the actual query is the same as the expected query. If the correct query was generated, return a 1.0. If a query was generated but is incorrect, return a 0.5. If no data was found, just return 0.0 as a score",
    # NOTE: you can only provide either criteria or evaluation_steps, and not both
     evaluation_steps=[
         "Determine whether the actual query is the same as the expected query.",
         "If the correct query was generated, return a value of '1'",
         "If a query was generated but is incorrect, return a value of '0.5'",
         "If no query was generated, return a value of '0'"
     ],
    evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
)

In [3]:
from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCaseParams
# TO DO SWITCH TO EVAL STEPS VS CRITERIA
response_correctness_metric = GEval(
    name="Correctness",
    model="gpt-4o",
    criteria="Determine whether the actual response is the same as the expected response. Does not have to be phrased the same, but is the end result the same. Return '1' if yes and '0' if no.",
    # NOTE: you can only provide either criteria or evaluation_steps, and not both
    # evaluation_steps=[
    #     "Determine whether the actual response is the same as the expected response. Does not have to be worded the same'",
     #    "If Yes, return a value of '1.0'", 
     #    "If No, return a value of'0.0'"
     #],
     
    evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT, LLMTestCaseParams.EXPECTED_OUTPUT],
)

In [None]:
# Full data for all 30 queries
import pandas as pd
data_full = [
    (1, "How many Business Groups are there?", 
     "MATCH (n:BusinessGroup) RETURN count(*);", 
     "13"),
    (2, "What type of database is Customer Service Database?", 
     "MATCH (d:Database) WHERE d.name CONTAINS 'Customer' and d.name CONTAINS 'Service' RETURN d.type;",
     "relational"),
    (3, "What report is the regional sales breakdown report field part of?", 
     "MATCH (r:ReportSection) WHERE r.name CONTAINS 'Regional' and r.name CONTAINS 'Sales' and r.name CONTAINS 'Breakdown' "
     "MATCH (r)-[:PART_OF]-(report:Report) RETURN report.name;",
     "Sales Peformance Dashboard"),
    (4, "Which users own the Customer Satisfaction Survey Analysis report?", 
     'MATCH (u:User)-[:OWNS]-(report:Report {name: "Customer Satisfaction Survey Analysis"}) RETURN u.name;',
     "Customer Service Director"),
    (5, "What business group is the IT database associated with?", 
     "MATCH (d:Database)-[:ASSOCIATED_WITH]->(bg:BusinessGroup) WHERE d.name CONTAINS 'IT' RETURN bg.name;",
     "Information Technology"),
    (6, "What are all of the latest versions of models?", 
     "MATCH (m:Model)-[:LATEST_VERSION]-(mv:ModelVersion) RETURN m.name, mv.name;",
     "Sales Performance Prediction Model Version3, Inventory Management Prediction Model Version3, Financial Health Prediction Model Version3, Employee Productivity Prediction Model Version3, Customer Satisfaction Prediction Model Version3"),
    (7, "What is the mean absolute error of the latest Inventory Management model?", 
     "MATCH (mv:ModelVersion) WHERE mv.name CONTAINS 'Inventory' and mv.name CONTAINS 'Management' and mv.latest_version = 'True' "
     "WITH mv.name as name, split(mv.performance_metrics, ',') as mets UNWIND mets as met "
     "WITH met WHERE met CONTAINS 'mean_absolute_error' RETURN btrim(split(met,':')[1]) as MeanAbsoluteError;",
     "0.7"),
    (8, "What are the root mean squared errors of all of the latest models?", 
     "MATCH (mv:ModelVersion) WHERE mv.latest_version = 'True' "
     "WITH mv.name as name, split(mv.performance_metrics, ',') as mets UNWIND mets as met "
     "WITH name, met WHERE met CONTAINS 'root_mean_squared_error' "
     "RETURN name, rtrim(btrim(split(met,':')[1]), '}') as RMSE ORDER BY toFloat(RMSE) ASC;", 
     "0.6 for Inventory Management Prediction Model Version3, 0.6 for Employee Productivity Prediction Model Version3, 0.6 for Customer Satisfaction Prediction Model Version3, 15000 for Sales Performance Prediction Model Version3"),
    (9, "Which users are entitled on the Executive Management Database and what are their accounts?", 
     "MATCH (u:User)-[:ENTITLED_ON]-(db:Database) WHERE db.name CONTAINS 'Executive' and db.name CONTAINS 'Management' "
     "RETURN u.name, u.account;",
     "Executive Managers and Strategy Analysts. The emails are exec_manager@executive.com and strategy_analyst@executive.com. "),
    (10, "Which models use a random forest?", 
     "MATCH (mv:ModelVersion) WHERE mv.model_parameters CONTAINS 'Random' and mv.model_parameters CONTAINS 'Forest' RETURN mv.name;", 
     "Employee Productivity Prediction Model Version3 and Customer Satisfaction Prediction Model Version3"),
    (11, "What databases are the user customer service analyst entitled to?", 
     'MATCH (u:User)-[:ENTITLED_ON]-(db:Database) WHERE u.name CONTAINS "customer" and u.name CONTAINS "service" and u.name CONTAINS "analyst RETURN db.name',
     'Customer_Service_Database'),
    (12, "Who has write access to the Operations Database?", 
     'MATCH (db:Database)<-[:ENTITLED_ON]-(u:User) WHERE db.name CONTAINS "Operations" AND "write" in u.entitlement RETURN u.name',
     'operations_manager'),
    (13, "What columns does the service tickets table have?", 
     'MATCH (t:Table)-[:HAS_COLUMN]->(c:Column) WHERE t.name CONTAINS "Service" and t.name CONTAINS "Tickets" RETURN c.name',
     'TicketID CustomerID IssueDescription ServiceTicketOpenDate ServiceTicketCloseDate TicketStatus TicketResolution'),
    (14, "What tables do the Marketing database contain?", 
     'MATCH (t:Table)<-[:CONTAINS]-(db:Database) WHERE db.name CONTAINS "Marketing"RETURN t.name',
     'Campaigns Social Media Posts Market Research Public Relations Events'),
    (15, "What business group is the Employee Productivity Report associated with?", 
     "MATCH (r:Report)-[:ASSOCIATED_WITH]->(bg:BusinessGroup) WHERE r.name CONTAINS 'Employee' and r.name CONTAINS 'Productivity' RETURN bg.name;",
     'human_resources'),
    (16, "What business group is the user sales analyst involved with?", 
     "MATCH (u:User)-[:ENTITLED_ON]->(db:Database) WHERE u.name CONTAINS 'sales' and u.name CONTAINS 'analyst' "
     "MATCH (db)-[:ASSOCIATED_WITH]->(bg:BusinessGroup) RETURN bg.name;", 
     'sales'),
    (17, "List all users who maintain, own, or are entitled on Inventory Management Report", 
     'MATCH (u:User)-[r]->(rp:Report) WHERE rp.name CONTAINS "Inventory" and rp.name CONTAINS "Management" RETURN u.name, r',
     'Warehouse Supervisor [:ENTITLED_ON], Procurement Specialist [:ENTITLED_ON], Inventory Manager [:OWNS], Supply Chain Analyst [:MAINTAINS], Supply Chain Analyst [:MAINTAINS]'),
    (18, "What tables does the executive management database contain?", 
     "MATCH (db:Database)-[:CONTAINS]->(t:Table) WHERE db.name CONTAINS 'Executive' and db.name CONTAINS 'Management' RETURN t.name;",
     'Departments, Strategic Initiatives, Performance Metrics'),
    (19, "Name all of the report fields in the Financial Health Dashboard report", 
     'MATCH (r:Report)<-[:PART_OF]-(rs:ReportSection) WHERE r.name CONTAINS "Financial" and r.name CONTAINS "Health" MATCH (rs)<-[:BELONGS_TO]-(rf:ReportField) RETURN rf.name',
     'Top Expense Categories, Cost Reduction Opportunities, Net Cash Flow, Cash Flow Trends, Predicted Revenue for Next Quarter, Revenue Confidence Interval'),
    (20, "Which models are used to create the Employee Productivity Report?", 
     'MATCH (r:Report)<-[:PART_OF]-(rs:ReportSection)<-[:BELONGS_TO]-(rf:ReportField)<-[:FEEDS]-(db:DataElement)<-[:PRODUCES]-(mv:ModelVersion)<-[:LATEST_VERSION]-(m:Model) WHERE r.name CONTAINS "Employee" and r.name CONTAINS "Productivity" RETURN m.name',
     'Employee Productivity Prediction Model'),
    (21, "Who are the creators of the Sales Performance Dashboard Report?", 
     "MATCH (u:User)-[r]->(rp:Report) WHERE rp.name = 'Sales Performance Dashboard' RETURN u.name, r;"),
    (22, "Give a summary of the sections in each report.", 
     "MATCH (rs:ReportSection)-[:PART_OF]->(r:Report) RETURN r.name AS Report, collect(rs.name) AS Sections;"),
    (23, "What tables are in the IT Database?", 
     "MATCH (d:Database {name: 'IT_Database'})-[:CONTAINS]->(t:Table) RETURN t.name;"),
    (24, "Who has write access to the Operations_Database?", 
     "MATCH (db:Database)<-[:ENTITLED_ON]-(u:User) WHERE db.name CONTAINS 'Operations' AND 'write' in u.entitlement RETURN u.name;"),
    (25, "Which model versions use mean absolute error as a performance metric?", 
     "MATCH (mv:ModelVersion) WHERE mv.performance_metrics CONTAINS 'mean_absolute_error' RETURN mv.name, mv.performance_metrics;"),
    (26, "For each database, list the users who have write access.", 
     "MATCH (u:User)-[:ENTITLED_ON]->(db:Database) WHERE 'write' in u.entitlement RETURN db.name, u.name;"),
    (27, "What are the email addresses of each of the Business Groups' Contacts?", 
     "MATCH (c:Contact)-[:CONTACT_OF]->(b:BusinessGroup) RETURN b.name AS BusinessGroup, c.email AS EmailAddress;"),
    (28, "What reports are downstream of each database?", 
     "MATCH (d:Database)-[:CONTAINS]->(:Table)-[:HAS_COLUMN]->(:Column)-[:TRANSFORMS]->(:DataElement)-[:FEEDS]->(:ReportField)-[:BELONGS_TO]->(:ReportSection)-[:PART_OF]->(r:Report) "
     "RETURN d.name AS Database, collect(r.name) AS Reports;"),
    (29, "List the reports that are associated with each business group.", 
     "MATCH (b:BusinessGroup)-[:ASSOCIATED_WITH]-(r:Report) RETURN b.name AS BusinessGroup, collect(r.name) AS Reports;"),
    (30, "List all sections and fields of the Employee Productivity Report", 
     "MATCH (r:Report {name: 'Employee Productivity Report'})-[:ASSOCIATED_WITH]->(bg:BusinessGroup), (rs:ReportSection)-[:PART_OF]->(r), (rf:ReportField)-[:BELONGS_TO]->(rs) "
     "RETURN rs.name as Section, rf.name as Field;")
]

# Create DataFrame with all questions and queries
df_full = pd.DataFrame(data_full, columns=["ID", "Question", "Cypher Query", "GT Response"])


In [5]:
df_full.iloc[0][2]

  df_full.iloc[0][2]


'MATCH (n:BusinessGroup) RETURN count(*);'

In [6]:
df_full.iloc[0][1], df_full.iloc[0][2]

  df_full.iloc[0][1], df_full.iloc[0][2]


('How many Business Groups are there?',
 'MATCH (n:BusinessGroup) RETURN count(*);')

In [7]:
#Try once with 1-0, try once with soft score
from app import rag_chatbot
from deepeval.test_case import LLMTestCase

generated_queries = []
generated_responses = []
query_scores = []
query_reasons = []
response_scores = []
response_reasons = []

for i in range(20):
    question = df_full.iloc[i][1]
    ground_truth_query = df_full.iloc[i][2]
    ground_truth_response = df_full.iloc[i][3]

    #code to run model to produce query
    try:
        generated_cypher, generated_response = rag_chatbot(question)
        cypher = generated_cypher[0]['query'].split("cypher\n")[1]
        cypher = cypher.replace("\n"," ")
    except Exception as e:
        cypher = ""

    generated_responses.append(generated_response)
    generated_queries.append(cypher)
    
    test_case_responses = LLMTestCase(
        input= question, 
        actual_output= generated_response,
        expected_output= ground_truth_response
    )

    response_correctness_metric.measure(test_case_responses)
    response_scores.append(response_correctness_metric.score)
    response_reasons.append(response_correctness_metric.reason)

    test_case_query = LLMTestCase(
        input= question, 
        actual_output= cypher,
        expected_output= ground_truth_query
    )

    query_correctness_metric.measure(test_case_query)
    query_scores.append(query_correctness_metric.score)
    query_reasons.append(query_correctness_metric.reason)
    

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: How many Business Groups are there?
Intent matching result: [UNCOMMON,0]
Retrieving information from the BusinessGroup_embedding_graph.
UNCOMMON QUERY


  warn_deprecated(


Retrieved Context: 
name: Executive Management

name: Research and Development

name: Finance and Accounting

name: finance

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (b:BusinessGroup) RETURN count(b) AS numberOfBusinessGroups
[0m
Full Context:
[32;1m[1;3m[{'numberOfBusinessGroups': 13}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (b:BusinessGroup) RETURN count(b) AS numberOfBusinessGroups\n'}, {'context': [{'numberOfBusinessGroups': 13}]}]
RETRIEVAL RESPONSE: [{'query': 'cypher\nMATCH (b:BusinessGroup) RETURN count(b) AS numberOfBusinessGroups\n'}, {'context': [{'numberOfBusinessGroups': 13}]}]


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What type of database is Customer Service Database?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Database_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Customer Service Database
type: relational

name: Sales Database
type: relational

name: Marketing Database
type: relational

name: Product Database
type: relational

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Database {name: "Customer Service Database"})
RETURN d.type
[0m
Full Context:
[32;1m[1;3m[{'d.type': 'relational'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (d:Database {name: "Customer Service Database"})\nRETURN d.type\n'}, {'context': [{'d.type': 'relational'}]}]
RETRIEVAL RESPONSE: [{'query': 'cypher\nMATCH (d:Database {name: "Customer Service Database"})\nRETURN d.type\n'}, {'context': [{'d.type': 'rel

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What report is the regional sales breakdown report field part of?
Intent matching result: [COMMON,3]
['regional sales breakdown', ' ReportField']
COMMON QUERY: [3|regional sales breakdown| ReportField]
Query with captured input parameter: MATCH (rf:ReportField {name: "regional sales breakdown"})
        OPTIONAL MATCH (rf)<-[:FEEDS]-(de1:DataElement)<-[:TRANSFORMS]-(col1:Column)-[r1]-(t1:Table)
        WITH rf, de1, collect(DISTINCT col1.name) AS cols1
        OPTIONAL MATCH (rf)<-[:FEEDS]-(de2_1:DataElement)<-[:PRODUCES]-(mv:ModelVersion)<-[:INPUT_TO]-(de2_2:DataElement)<-[:TRANSFORMS]-(col2:Column)-[r2]-(t2:Table)
        WHERE mv.latest_version = "True"
        WITH rf, de1, cols1, de2_1, collect(DISTINCT col2.name) AS cols2, mv, collect(DISTINCT de2_2.name) AS de2_2s
        WITH rf, COALESCE(de1.name, de2_1.name) AS de, (cols1 + cols2) AS cols, mv, de2_2s
        RETURN { ReportField: rf.name, ModelVersion: mv.name, Column: cols } AS

  warn_deprecated(



Original User Input: [What report is the regional sales breakdown report field part of?]
Corrected User Input: [[Sales by Region|What report is the regional sales breakdown report field part of?]]
Query with captured input parameter: MATCH (rf:ReportField {name: "Sales by Region"})
        OPTIONAL MATCH (rf)<-[:FEEDS]-(de1:DataElement)<-[:TRANSFORMS]-(col1:Column)-[r1]-(t1:Table)
        WITH rf, de1, collect(DISTINCT col1.name) AS cols1
        OPTIONAL MATCH (rf)<-[:FEEDS]-(de2_1:DataElement)<-[:PRODUCES]-(mv:ModelVersion)<-[:INPUT_TO]-(de2_2:DataElement)<-[:TRANSFORMS]-(col2:Column)-[r2]-(t2:Table)
        WHERE mv.latest_version = "True"
        WITH rf, de1, cols1, de2_1, collect(DISTINCT col2.name) AS cols2, mv, collect(DISTINCT de2_2.name) AS de2_2s
        WITH rf, COALESCE(de1.name, de2_1.name) AS de, (cols1 + cols2) AS cols, mv, de2_2s
        RETURN { ReportField: rf.name, ModelVersion: mv.name, Column: cols } AS result
        


2024-12-04 10:14:54.364 
  command:

    streamlit run /Users/shreyakurdukar/Desktop/dsi-capstone-capstone-kpmg-jim-ben/chat-venv/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: Which users own the Customer Satisfaction Survey Analysis report?
Intent matching result: [UNCOMMON,0]
Retrieving information from the User_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Customer Insights Analyst
account: customer.insights.analyst@company.com

name: Customer Insights Analyst
account: customer.insights.analyst@company.com

name: customer_service_analyst
account: cus_analyst@customerservicecompany.com

name: Sales Data Analyst
account: sales.analyst@company.com

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User)-[:OWNS]->(r:Report {name: "Customer Satisfaction Survey Analysis"})
RETURN u.name
[0m
Full Context:
[32;1m[1;3m[{'u.name': 'Customer Service Director'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (u:User)-[:OWNS]->(r:Report {name: "Customer Satisfaction Survey Analysi

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What business group is the IT database associated with?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Database_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: IT Database
type: relational

name: R&D Database
type: relational

name: Executive Management Database
type: relational

name: HR Database
type: relational

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (db:Database {name: "IT"})-[:ASSOCIATED_WITH]->(bg:BusinessGroup)
RETURN bg.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (db:Database {name: "IT"})-[:ASSOCIATED_WITH]->(bg:BusinessGroup)\nRETURN bg.name\n'}, {'context': []}]
Query failed to retrieve data. Refining query...

 Attempt 2 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[

ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What are all of the latest versions of models?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Model_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Model)-[:LATEST_VERSION]->(mv:ModelVersion)
RETURN m.name, mv.version, mv.latest_version
[0m
Full Context:
[32;1m[1;3m[{'m.name': 'Sales Performance Prediction Model', 'mv.version': 3, 'mv.latest_version': 'True'}, {'m.name': 'Inventory Management Prediction Model', 'mv.version': 3, 'mv.latest_version': 'True'}, {'m.name': 'Financial Health Prediction Model', 'mv.version': 3, 'mv.latest_version': 'True'}, {'m.name': 'Employee Productivity Prediction Model', 'mv.version': 3, 'mv.latest_version': 'True'}, {'m.name': 'Customer Satisfaction Prediction Model', 'mv.version': 3, 'mv.latest_version': 'True'}][0m

[1m> Finished chain.

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What is the mean absolute error of the latest Inventory Management model?
Intent matching result: [COMMON,2]
['Inventory Management', 'Model']
COMMON QUERY: [2|Inventory Management|Model]
Query with captured input parameter: MATCH (m:Model)
        WHERE m.name CONTAINS "Inventory Management"
        MATCH (m)-[r1:LATEST_VERSION]->(mv1:ModelVersion)
        RETURN mv1.performance_metrics AS performance_metrics
Neo4j cypher query result: ['{"mean_absolute_error": 0.7, "mean_percentage_error": 0.55, "root_mean_squared_error": 0.6}']


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What are the root mean squared errors of all of the latest models?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Model_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Model)-[:LATEST_VERSION]->(mv:ModelVersion)
RETURN mv.name, mv.performance_metrics
[0m
Full Context:
[32;1m[1;3m[{'mv.name': 'Sales Performance Prediction Model Version3', 'mv.performance_metrics': '{"mean_absolute_error": 10000, "mean_percentage_error": 0.1, "root_mean_squared_error": 15000}'}, {'mv.name': 'Inventory Management Prediction Model Version3', 'mv.performance_metrics': '{"mean_absolute_error": 0.7, "mean_percentage_error": 0.55, "root_mean_squared_error": 0.6}'}, {'mv.name': 'Financial Health Prediction Model Version3', 'mv.performance_metrics': '{"mean_absolute_error": 5, "mean_squared_error": 1

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: Which users are entitled on the Executive Management Database and what are their accounts?
Intent matching result: [UNCOMMON,0]
Retrieving information from the User_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: executive_manager
account: exec_manager@executive.com

name: Department Manager
account: department.manager@company.com

name: hr_manager
account: hr_manager@hrcompany.com

name: Inventory Manager
account: inventory.manager@company.com

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User)-[:ENTITLED_ON]->(d:Database {name: "Executive Management Database"})
RETURN u.name, u.account
[0m
Full Context:
[32;1m[1;3m[{'u.name': 'executive_manager', 'u.account': 'exec_manager@executive.com'}, {'u.name': 'strategy_analyst', 'u.account': 'strategy_analyst@executive.com'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'qu

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: Which models use a random forest?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Model_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Model)-[:VERSION_OF]->(mv:ModelVersion)
WHERE mv.model_parameters CONTAINS 'random forest'
RETURN m.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': "cypher\nMATCH (m:Model)-[:VERSION_OF]->(mv:ModelVersion)\nWHERE mv.model_parameters CONTAINS 'random forest'\nRETURN m.name\n"}, {'context': []}]
Query failed to retrieve data. Refining query...

 Attempt 2 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (mv:ModelVersion)
WHERE mv.model_parameters CONTAINS 'random forest'
MATCH (m:Model)-[:VERSION_OF]->(mv)
RET


[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': "cypher\nMATCH (m:Model)\nWHERE EXISTS {\n  MATCH (m)-[:VERSION_OF]->(mv:ModelVersion)\n  WHERE mv.model_parameters CONTAINS 'random forest'\n}\nRETURN m.name\n"}, {'context': []}]
Query failed to retrieve data. Refining query...
FIRST N DOCS: 
NODE NAME: []
ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What databases are the user customer service analyst entitled to?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Database_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: HR Database
type: relational

name: R&D Database
type: relational

name: IT Database
type: relational

name: Finance Database
type: relational

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User {name: "customer service analyst"})-[:ENTITLED_ON]->(d:Database)
RETURN d.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (u:User {name: "customer service analyst"})-[:ENTITLED_ON]->(d:Database)\nRETURN d.name\n'}, {'context': []}]
Query failed to retrieve data. Refining query...

 Attempt 2 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated C

ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: Who has write access to the Operations_Database?


Intent matching result: [UNCOMMON,0]


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What columns does the service tickets table have?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Column_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: ServiceTicketOpenDate
type: date

name: TicketID
type: integer

name: ServiceTicketCloseDate
type: date

name: TicketOpenDate
type: date

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (t:Table {name: "service tickets"})-[:HAS_COLUMN]->(c:Column)
RETURN c.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (t:Table {name: "service tickets"})-[:HAS_COLUMN]->(c:Column)\nRETURN c.name\n'}, {'context': []}]
Query failed to retrieve data. Refining query...

 Attempt 2 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Column)<-[

ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What tables do the marketing database contain?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Table_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Market Research

name: Campaigns

name: Products

name: Financial Reports

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Database {name: "marketing"})-[:CONTAINS]->(t:Table)
RETURN t.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (d:Database {name: "marketing"})-[:CONTAINS]->(t:Table)\nRETURN t.name\n'}, {'context': []}]
Query failed to retrieve data. Refining query...

 Attempt 2 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (t:Table)<-[:CONTAINS]-(d:Database {name: "marketing"})
RETURN t.name[0m
Full Context:
[32;1m[1;

ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What business group is the Employee Productivity Report associated with?
Intent matching result: [UNCOMMON,0]
Retrieving information from the BusinessGroup_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: human_resources

name: Product

name: Research and Development

name: Marketing and Public Relations

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (r:Report {name: "Employee Productivity Report"})-[:ASSOCIATED_WITH]->(bg:BusinessGroup)
RETURN bg.name
[0m
Full Context:
[32;1m[1;3m[{'bg.name': 'human_resources'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (r:Report {name: "Employee Productivity Report"})-[:ASSOCIATED_WITH]->(bg:BusinessGroup)\nRETURN bg.name\n'}, {'context': [{'bg.name': 'human_resources'}]}]
RETRIEVAL RESPONSE: [{'query': 'cypher\nMATCH (r:Report {name: "Employee Productivity Re

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What business group is the user sales analyst involved with?
Intent matching result: [UNCOMMON,0]
Retrieving information from the User_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Sales Analyst
account: sales.analyst@company.com

name: Sales Data Analyst
account: sales.analyst@company.com

name: sales_analyst
account: sales_analyst@salescompany.com

name: Customer Insights Analyst
account: customer.insights.analyst@company.com

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User {name: "sales analyst"})-[:ENTITLED_ON]->(:Database)-[:ASSOCIATED_WITH]->(bg:BusinessGroup)
RETURN bg.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (u:User {name: "sales analyst"})-[:ENTITLED_ON]->(:Database)-[:ASSOCIATED_WITH]->(bg:BusinessGroup)\nRETURN bg.name\n'}, {'context': []

ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: List all users who maintain, own, or are entitled on Inventory Management Report
Intent matching result: [UNCOMMON,0]
Retrieving information from the User_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Inventory Manager
account: inventory.manager@company.com

name: Inventory Manager
account: inventory.manager@company.com

name: inventory_specialist
account: inventory_spec@operations.com

name: Warehouse Supervisor
account: warehouse.supervisor@company.com

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User)-[:MAINTAINS|OWNS|ENTITLED_ON]->(r:Report {name: "Inventory Management Report"})
RETURN u.name
[0m
Full Context:
[32;1m[1;3m[{'u.name': 'Warehouse Supervisor'}, {'u.name': 'Procurement Specialist'}, {'u.name': 'Inventory Manager'}, {'u.name': 'Supply Chain Analyst'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'qu

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: What tables does the executive management database contain?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Table_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Financial Reports

name: Employees

name: Departments

name: Employee Performance

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (d:Database {name: "executive management"})-[:CONTAINS]->(t:Table)
RETURN t.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (d:Database {name: "executive management"})-[:CONTAINS]->(t:Table)\nRETURN t.name\n'}, {'context': []}]
Query failed to retrieve data. Refining query...

 Attempt 2 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (t:Table)<-[:CONTAINS]-(d:Database {name: "execut

ERROR: list index out of range


  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: Name all of the report fields in the Financial Health Dashboard report
Intent matching result: [UNCOMMON,0]
Retrieving information from the ReportField_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Cash Flow Trends
id: cash_flow_trends

name: Net Cash Flow
id: net_cash_flow

name: Inventory Turnover Ratio
id: inventory_turnover_ratio

name: Predicted Revenue for Next Quarter
id: predicted_revenue_for_next_quarter

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (r:Report {name: "Financial Health Dashboard"})-[:PART_OF]->(rs:ReportSection)<-[:BELONGS_TO]-(rf:ReportField)
RETURN rf.name
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (r:Report {name: "Financial Health Dashboard"})-[:PART_OF]->(rs:ReportSection)<-[:BELONGS_TO]-(rf:ReportField)\nRETURN rf.name\n'}, {'conte

  question = df_full.iloc[i][1]
  ground_truth_query = df_full.iloc[i][2]
  ground_truth_response = df_full.iloc[i][3]


---------------------------------
User request: Which models are used to create the Employee Productivity Report?
Intent matching result: [UNCOMMON,0]
Retrieving information from the Model_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (r:Report {name: "Employee Productivity Report"})-[:ASSOCIATED_WITH]->(:BusinessGroup)<-[:CONTACT_OF]-(:Contact),
      (r)<-[:ENTITLED_ON|:OWNS|:MAINTAINS]-(:User),
      (r)<-[:PART_OF]-(:ReportSection)<-[:BELONGS_TO]-(:ReportField)<-[:FEEDS]-(:DataElement)<-[:PRODUCES]-(:ModelVersion)<-[:VERSION_OF]-(m:Model)
RETURN DISTINCT m.name
[0m
Full Context:
[32;1m[1;3m[{'m.name': 'Employee Productivity Prediction Model'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'query': 'cypher\nMATCH (r:Report {name: "Employee Productivity Report"})-[:ASSOCIATED_WITH]->(:BusinessGroup)<-[:CONTACT_OF]-(:Contact)

In [8]:
print(response_scores)
print(query_scores)

[1.0, 1.0, 0.0, 0.9, 0.0, 0.1, 1.0, 1.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 1.0, 0.1]
[0.05, 0.05, 0.0, 0.05, 0.0, 0.05, 0.0, 0.05, 0.05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05, 0.0, 0.05, 0.0, 0.05, 0.05]


In [9]:
print(generated_responses[16])


The users who maintain, own, or are entitled on the Inventory Management Report are:
- Warehouse Supervisor
- Procurement Specialist
- Inventory Manager
- Supply Chain Analyst


In [10]:
print(generated_queries)

['MATCH (b:BusinessGroup) RETURN count(b) AS numberOfBusinessGroups ', 'MATCH (d:Database {name: "Customer Service Database"}) RETURN d.type ', '', 'MATCH (u:User)-[:OWNS]->(r:Report {name: "Customer Satisfaction Survey Analysis"}) RETURN u.name ', '', 'MATCH (m:Model)-[:LATEST_VERSION]->(mv:ModelVersion) RETURN m.name, mv.version, mv.latest_version ', '', 'MATCH (m:Model)-[:LATEST_VERSION]->(mv:ModelVersion) RETURN mv.name, mv.performance_metrics ', 'MATCH (u:User)-[:ENTITLED_ON]->(d:Database {name: "Executive Management Database"}) RETURN u.name, u.account ', '', '', '', '', '', 'MATCH (r:Report {name: "Employee Productivity Report"})-[:ASSOCIATED_WITH]->(bg:BusinessGroup) RETURN bg.name ', '', 'MATCH (u:User)-[:MAINTAINS|OWNS|ENTITLED_ON]->(r:Report {name: "Inventory Management Report"}) RETURN u.name ', '', 'MATCH (rf:ReportField)-[:BELONGS_TO]->(rs:ReportSection)-[:PART_OF]->(r:Report {name: "Financial Health Dashboard"}) RETURN rf.name ', 'MATCH (r:Report {name: "Employee Product

In [11]:
from app import rag_chatbot

generated_cypher, generated_response = rag_chatbot("List all users who maintain, own, or are entitled on Inventory Management Report")
print(generated_response)

---------------------------------
User request: List all users who maintain, own, or are entitled on Inventory Management Report
Intent matching result: [UNCOMMON,0]
Retrieving information from the User_embedding_graph.
UNCOMMON QUERY
Retrieved Context: 
name: Inventory Manager
account: inventory.manager@company.com

name: Inventory Manager
account: inventory.manager@company.com

name: inventory_specialist
account: inventory_spec@operations.com

name: Warehouse Supervisor
account: warehouse.supervisor@company.com

 Attempt 1 generating Cypher query...


[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (u:User)-[:MAINTAINS|OWNS|ENTITLED_ON]->(r:Report {name: "Inventory Management Report"})
RETURN u.name
[0m
Full Context:
[32;1m[1;3m[{'u.name': 'Warehouse Supervisor'}, {'u.name': 'Procurement Specialist'}, {'u.name': 'Inventory Manager'}, {'u.name': 'Supply Chain Analyst'}][0m

[1m> Finished chain.[0m
LangChain Cypher query steps: [{'qu