In [66]:
import os
from langchain_openai import OpenAI

In [127]:
#from langchain.chat_models import AzureChatOpenAI
from langchain_openai import AzureChatOpenAI

gpt = AzureChatOpenAI(
    azure_deployment="gpt-35-turbo-1106",
    openai_api_key="api key",
    openai_api_type="azure",
    openai_api_version="2023-12-01-preview",
    azure_endpoint="https://chatbotopenaikeyswe.openai.azure.com/",
    verbose=True,
)

In [128]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import CSVLoader
path = "C:/Users/Admin/Desktop/hsag chatbot/ragcsvdata"
csv_loader_kwargs={'autodetect_encoding': True}
loader = DirectoryLoader(path, glob="**/*.csv", loader_cls=CSVLoader, loader_kwargs=csv_loader_kwargs)
db = loader.load()

In [129]:
from langchain_community.document_loaders import AsyncHtmlLoader

urls = ["https://www.thuega-energie-gmbh.de/privatkunden.html"]
loader = AsyncHtmlLoader(urls)
docs = loader.load()

from langchain_community.document_transformers import Html2TextTransformer

html2text = Html2TextTransformer()
db += html2text.transform_documents(docs)

Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.62it/s]


In [130]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [131]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,
)

In [132]:
texts = text_splitter.split_documents(db)

In [133]:
from langchain_openai import AzureOpenAIEmbeddings
embed = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    openai_api_key="api key",
    azure_endpoint="https://chatbotopenaikeyswe.openai.azure.com/"
)

In [134]:
#vectorstore = Chroma.from_documents(texts, embed, persist_directory="C:/chroma_db")
#vectorstore.persist()

vectorstore2 = Chroma(persist_directory="C:/chroma_db", embedding_function=embed)
retriever = vectorstore2.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [137]:
from langchain.prompts import ChatPromptTemplate

template = """
You are an assistant named "hsag-chatbot" for question-answering tasks 
related to the Energy industry and general conversation. 

If the question is related to the energy domain try to answer the question from the knowledge 
you have in your memory.

Use the following pieces of retrieved context to answer the question 
or engage in small talk with the user in a friendly and informative way. 
If you don't know the answer to a factual question, 
just say that you don't know. 
Use three sentences maximum and keep the answer concise. 
If the user asks funny questions or jokes, try to answer them 
using your knowledge or generate a humorous response. 
If they ask general knowledge about the world, try to answer 
those questions using your knowledge and also domain-specific knowledge 
about the energy industry. 


Context:
{context}

Question:
{question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [138]:
from operator import itemgetter

from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

llm_model = gpt

rag_chain = (
    {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | llm_model, "context": itemgetter("context")}
)

In [165]:
question = "What is the Gas contract number, IBAN, street,  start of the contract, city,of the customer named Sebastian?"
result = rag_chain.invoke({"question" : question})
print(result["response"].content)
#print(result["context"])

The gas contract number for the customer named Sebastian is ES00000062980, the IBAN is DE89370400440532013000, the street is Schmiedeweg, the start of the contract is on 2022-03-14, and the city is Lauenburg.


RAGAS EVALUATION


In [140]:
import pandas as pd

# Load the CSV file into a DataFrame with UTF-8 encoding
test_df = pd.read_csv('C:/Users/Admin/Desktop/hsag chatbot/ragashsagtestset.csv', encoding='utf-8')
test_df

Unnamed: 0,question,contexts,ground_truth,evolution_type
0,What is the Gas contract number of the custome...,contractNumber: ES00000062980\nsection: Gas\ni...,The gas contract number for the customer named...,simple
1,What is the meter number for meterId 1244 and ...,['registerId: 1\nobis: 1-1:1.8.1\nlabel: HT\ne...,The meter number for meterId 1244 is ZEI2332DE...,simple
2,"What is the Gas contract number, IBAN, street,...",contractNumber: ES00000062980\nsection: Gas\ni...,"The Gas contract number is ES00000062980, the ...",simple
3,What is the purpose of the charging instructio...,['id: chargingInstruction\nSkillName: EMobilit...,The purpose of the charging instruction in the...,reasoning
4,What services does Thuga Energie provide as a ...,['kWh im Jahr\n\nAktionscode\n\nLeider wird di...,Thuga Energie provides innovative products suc...,multicontext
5,What are the details of the meter with meterId...,['meterId: 1244\nmeterNumber: ZEI2332DE2\nsect...,The details of the meter with meterId 1244 in ...,multicontext
6,What options are there for making changes in o...,['conversationId: 4oBqHMWVxLO3xJrrHxQWIX-eu\nt...,The Kundenportal offers the option to make cha...,reasoning
7,How do I delete historical meter readings and ...,['answerId: 103165\ntext: Um **historische Zäh...,"To delete historical meter readings, it is bes...",multicontext
8,What are the service hours for the Kundenzentr...,['__ Facebook\n\n__ Instagram\n\n**Kundenzentr...,The service hours for the Kundenzentrum in Sud...,multicontext
9,Where can I find the meter number for my elect...,['conversationId: 43OIHyE5TDoJGpDL51TIEd-eu\nt...,You can find the meter number for your electri...,simple


In [42]:
%pip install ragas

Collecting ragasNote: you may need to restart the kernel to use updated packages.





  Downloading ragas-0.1.8-py3-none-any.whl.metadata (5.2 kB)
Collecting datasets (from ragas)
  Using cached datasets-2.19.1-py3-none-any.whl.metadata (19 kB)
Collecting pysbd>=0.3.4 (from ragas)
  Using cached pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Collecting appdirs (from ragas)
  Using cached appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting pyarrow-hotfix (from datasets->ragas)
  Using cached pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets->ragas)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets->ragas)
  Using cached xxhash-3.4.1-cp312-cp312-win_amd64.whl.metadata (12 kB)
Collecting multiprocess (from datasets->ragas)
  Using cached multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Collecting huggingface-hub>=0.21.2 (from datasets->ragas)
  Downloading huggingface_hub-0.23.1-py3-none-any.whl.metadata (12 kB)
Downloading ragas-0.1.8-py3-none-any.whl (84 kB)


In [141]:
azure_configs = {
    "base_url": "https://chatbotopenaikeyswe.openai.azure.com/",
    "model_deployment": "gpt-35-turbo-1106",
    "model_name": "gpt-35-turbo",
    "embedding_deployment": "text-embedding-ada-002",
    "embedding_name": "text-embedding-ada-002",
}

In [142]:
azure_model = AzureChatOpenAI(
    openai_api_key="api key",
    openai_api_version="2023-12-01-preview",
    azure_endpoint=azure_configs["base_url"],
    azure_deployment=azure_configs["model_deployment"],
    model=azure_configs["model_name"],
    validate_base_url=False,
)


In [143]:
azure_embeddings = AzureOpenAIEmbeddings(
    openai_api_key="api key",
    openai_api_version="2023-05-15",
    azure_endpoint=azure_configs["base_url"],
    azure_deployment=azure_configs["embedding_deployment"],
    model=azure_configs["embedding_name"],
)

In [144]:
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()

In [145]:
test_questions

['What is the Gas contract number of the customer named Sebastian?',
 'What is the meter number for meterId 1244 and what is its meter type?',
 'What is the Gas contract number, IBAN, street,  start of the contract, city,of the customer named Sebastian?',
 'What is the purpose of the charging instruction in the EMobilitySkill and its connection to the charging payment aspect?',
 'What services does Thuga Energie provide as a regional partner in the Hegau-Bodensee, Rhein-Pfalz, and Allgau-Oberschwaben regions?',
 'What are the details of the meter with meterId 1244 in the Power section?',
 'What options are there for making changes in our Kundenportal, and how can open questions be resolved with Kundenservice?',
 'How do I delete historical meter readings and what metering systems automate reading collection and consumption data?',
 'What are the service hours for the Kundenzentrum in Sudpfalz and which regions does Thuga Energie serve?',
 'Where can I find the meter number for my elect

In [146]:
answers = []
contexts = []

for question in test_questions:
  response = rag_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [147]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [148]:
response_dataset[0]

{'question': 'What is the Gas contract number of the customer named Sebastian?',
 'answer': 'The gas contract number for the customer named Sebastian is ES00000062980.',
 'contexts': ['ï»¿contractNumber: ES00000062980\nsection: Gas\ninvoicingInterval: 12\ncancellationDateTo: \nperiodStart: 2022-03-14T23:00:00.000Z\nperiodEnd: \nstate: GPKE_APPROVED\nnextPossibleCancellationDate: 2024-03-14T22:59:59.000Z\nfirstName: Sebastian\nlastName: LÃ¶ecke\nbankName: Commerzbank KÃ¶ln\niban: DE89370400440532013000\nbic: COBADEFFXXX\ncustomerNumber: 1005520645\nstreet: Schmiedeweg\nhousenumber: 10b\npostalCode: 21481\ncity: Lauenburg',
  'ï»¿contractNumber: ES00000062980\nsection: Gas\ninvoicingInterval: 12\ncancellationDateTo: \nperiodStart: 2022-03-14T23:00:00.000Z\nperiodEnd: \nstate: GPKE_APPROVED\nnextPossibleCancellationDate: 2024-03-14T22:59:59.000Z\nfirstName: Sebastian\nlastName: LÃ¶ecke\nbankName: Commerzbank KÃ¶ln\niban: DE89370400440532013000\nbic: COBADEFFXXX\ncustomerNumber: 1005520645

In [149]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
]

In [150]:
results = evaluate(response_dataset, metrics, llm=azure_model, embeddings=azure_embeddings)

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

Evaluating: 100%|██████████| 50/50 [04:08<00:00,  4.97s/it]


In [151]:
results

{'faithfulness': 0.9024, 'answer_relevancy': 0.9600, 'answer_correctness': 0.6893, 'context_recall': 0.9750, 'context_precision': 0.9833}

In [162]:
import pandas as pd

df = pd.DataFrame(list(results.items()), columns=['Metric', 'Value'])
print(df)


               Metric     Value
0        faithfulness  0.902381
1    answer_relevancy  0.960016
2  answer_correctness  0.689309
3      context_recall  0.975000
4   context_precision  0.983333


In [159]:
df = results.to_pandas()

In [160]:
df

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,answer_correctness,context_recall,context_precision
0,What is the Gas contract number of the custome...,The gas contract number for the customer named...,[ï»¿contractNumber: ES00000062980\nsection: Ga...,The gas contract number for the customer named...,1.0,0.997162,0.748754,1.0,1.0
1,What is the meter number for meterId 1244 and ...,The meter number for meterId 1244 is ZEI2332DE...,[ï»¿meterId: 1244\nmeterNumber: ZEI2332DE2\nse...,The meter number for meterId 1244 is ZEI2332DE...,1.0,1.0,1.0,1.0,1.0
2,"What is the Gas contract number, IBAN, street,...",The Gas contract number for the customer named...,[ï»¿contractNumber: ES00000062980\nsection: Ga...,"The Gas contract number is ES00000062980, the ...",1.0,0.939538,0.841884,1.0,1.0
3,What is the purpose of the charging instructio...,The purpose of the charging instruction in the...,[id: chargingPayment\nSkillName: EMobilitySkil...,The purpose of the charging instruction in the...,0.5,0.968782,0.655368,1.0,1.0
4,What services does Thuga Energie provide as a ...,Thuga Energie provides innovative products suc...,[Viele Anliegen können Sie über unsere Onlines...,Thuga Energie provides innovative products suc...,1.0,0.928944,0.672555,1.0,1.0
5,What are the details of the meter with meterId...,The meter with meterId 1244 in the Power secti...,[ï»¿meterId: 1244\nmeterNumber: ZEI2332DE2\nse...,The details of the meter with meterId 1244 in ...,1.0,1.0,0.538733,1.0,0.833333
6,What options are there for making changes in o...,"In the Kundenportal, you can make changes onli...",[answerId: 102212\ntext: Unser Kundenportal st...,The Kundenportal offers the option to make cha...,1.0,0.925665,0.4731,0.75,1.0
7,How do I delete historical meter readings and ...,"To delete historical meter readings, you would...",[conversationId: Dtf97DFIiX05X7OmOJLDx1-eu\nty...,"To delete historical meter readings, it is bes...",0.666667,0.91517,0.416161,1.0,1.0
8,What are the service hours for the Kundenzentr...,The service hours for the Kundenzentrum in Sud...,[Viele Anliegen können Sie über unsere Onlines...,The service hours for the Kundenzentrum in Sud...,0.857143,0.932504,0.546539,1.0,1.0
9,Where can I find the meter number for my elect...,You can find the meter number for your electri...,[answerId: 102127\ntext: Die **Zählernummer** ...,You can find the meter number for your electri...,1.0,0.992396,1.0,1.0,1.0


In [152]:
df = results.to_pandas()
df.head()

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,answer_correctness,context_recall,context_precision
0,What is the Gas contract number of the custome...,The gas contract number for the customer named...,[ï»¿contractNumber: ES00000062980\nsection: Ga...,The gas contract number for the customer named...,1.0,0.997162,0.748754,1.0,1.0
1,What is the meter number for meterId 1244 and ...,The meter number for meterId 1244 is ZEI2332DE...,[ï»¿meterId: 1244\nmeterNumber: ZEI2332DE2\nse...,The meter number for meterId 1244 is ZEI2332DE...,1.0,1.0,1.0,1.0,1.0
2,"What is the Gas contract number, IBAN, street,...",The Gas contract number for the customer named...,[ï»¿contractNumber: ES00000062980\nsection: Ga...,"The Gas contract number is ES00000062980, the ...",1.0,0.939538,0.841884,1.0,1.0
3,What is the purpose of the charging instructio...,The purpose of the charging instruction in the...,[id: chargingPayment\nSkillName: EMobilitySkil...,The purpose of the charging instruction in the...,0.5,0.968782,0.655368,1.0,1.0
4,What services does Thuga Energie provide as a ...,Thuga Energie provides innovative products suc...,[Viele Anliegen können Sie über unsere Onlines...,Thuga Energie provides innovative products suc...,1.0,0.928944,0.672555,1.0,1.0


In [153]:
file_path = 'C:/Users/Admin/Desktop/hsag chatbot/evaluationresults1.csv'
df.to_csv(file_path, index=False)