In [15]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import TestsetGenerator

from langchain_community.document_loaders import UnstructuredExcelLoader, UnstructuredMarkdownLoader
from uuid import uuid4
from pathlib import Path
import pandas as pd

import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPEN_AI")


In [None]:
data_folder = Path("data")
files = [
    "fees_and_scholarships.md", "accomadation.xlsx", "courses.xlsx", "faq.xlsx",
    "sports_club.xlsx", "student_club.xlsx"
]

docs_dict = {}

for file in files:
    file_path = data_folder / file
    if file.endswith('.xlsx'):
        loader = UnstructuredExcelLoader(str(file_path), mode="elements")
    elif file.endswith('.md'):
        loader = UnstructuredMarkdownLoader(str(file_path))
    else:
        print(f"Unsupported file type: {file}")
        continue
    
    try:
        doc = loader.load()
        docs_dict[file] = doc
    except Exception as e:
        print(f"Error loading {file}: {e}")

print(f"Loaded documents for {len(docs_dict)} files.")

In [3]:
df_testests = {}

In [None]:
for f, docs in docs_dict.items():
    if f not in df_testests.keys():
        if len(docs) >= 100:
            docs = docs[:100]
        print(f"generating test set for: {f}")
        generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini", temperature=0.1))
        generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

        generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
        testset = generator.generate_with_langchain_docs(docs, testset_size=30)
        df = testset.to_pandas()
        df_testests[f] = df
        display(df)


In [9]:
dataframes = []
for file_name, df in df_testests.items():
    df["filename"] = file_name
    dataframes.append(df)
df = pd.concat(dataframes, ignore_index=True)

In [10]:
df.to_pickle("test_data_generated.pkl")

In [3]:
import pandas as pd
df = pd.read_pickle("test_data_generated.pkl")

In [4]:
import re
def clean_reference_contexts(contexts):
    if isinstance(contexts, list):
        return [re.sub(r"[a-f0-9-]{36}\n\n", "", context) for context in contexts]
    elif isinstance(contexts, str):
        return re.sub(r"[a-f0-9-]{36}\n\n", "", contexts)
    return contexts

df['reference_contexts'] = df['reference_contexts'].apply(clean_reference_contexts)

In [26]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


In [27]:
template = """
You are an advanced AI trained to evaluate answers based on reference contexts. You will assess whether the provided answer is correct or incorrect according to the given reference contexts. 

You should respond when you are at least 90% confident in your evaluation. If the reference contexts are insufficient to answer the question accurately, provide a detailed explanation of the reason why the answer cannot be determined. 

Your response will be in the form of a tuple, following the examples below:

(Correct, Explanation of why the answer is correct)
(Incorrect, Explanation of why the answer is incorrect)

---

**Question:**
{user_input}

**Reference Contexts:**
{reference_contexts}

**Answer:**
{reference}

Assistant:

"""

prompt = ChatPromptTemplate.from_template(template) 

In [28]:

llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key=os.getenv("OPEN_AI"),
)


In [29]:
rag_chain = (prompt | llm | StrOutputParser())

In [30]:
df = df[["user_input","reference_contexts","reference"]]
results = {}
for index, row in df.iterrows():
    row_dict = row.to_dict()
    _r = rag_chain.invoke(row_dict)
    results[index] = _r
    print(_r)

(Correct, The answer accurately reflects the information provided in the reference context, detailing how the Office of Dormitories obtained quality certification from the Turkish Standards Institution and the successful inspections that demonstrate their commitment to high standards in dormitory services.)
(Correct, The answer accurately reflects the information provided in the reference context, which states that the Executive Board of Dormitories evaluates applications based on criteria established by regulations approved by the Rectorate.)
(Correct, The answer accurately reflects the information provided in the reference context regarding the operating hours for buses and minibuses.)
(Correct, The answer accurately reflects the information provided in the reference context, stating the operating hours of buses and minibuses between the city center and the dormitories.)
(Correct, The answer accurately reflects the information provided in the reference context, which states that stud

In [36]:
results_df = pd.DataFrame(list(results.items()), columns=['Index_of_df', 'Answer'])

In [38]:
results_df.to_json('test_data_control_results.json', orient='records', lines=True)

In [39]:
results_df

Unnamed: 0,Index_of_df,Answer
0,0,"(Correct, The answer accurately reflects the i..."
1,1,"(Correct, The answer accurately reflects the i..."
2,2,"(Correct, The answer accurately reflects the i..."
3,3,"(Correct, The answer accurately reflects the i..."
4,4,"(Correct, The answer accurately reflects the i..."
...,...,...
131,131,"(Correct, The answer accurately reflects the a..."
132,132,"(Correct, The answer accurately summarizes the..."
133,133,"(Correct, The answer accurately summarizes the..."
134,134,"(Correct, The answer accurately describes the ..."


In [40]:
results_df[['Correctness', 'Details']] = results_df['Answer'].str.extract(r'\((\w+),\s*(.*)\)')

In [42]:
results_df.Correctness.unique()

array(['Correct', 'Incorrect'], dtype=object)

In [44]:
correct_idx = results_df[results_df.Correctness == "Correct"].Index_of_df

In [46]:
df.loc[correct_idx].to_pickle("controlled_test_data.pkl")