In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import MaxNLocator


In [3]:
eval_data = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation.csv')

In [14]:
eval_data_gemini = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_gemini.csv')

In [17]:
eval_data_gemini = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_gemini_2.csv')

In [19]:
eval_data_gemini = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_gemini_context_20.csv')

In [21]:
eval_data_gemini = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_gemini_context_30.csv')

In [22]:
eval_data_gemini = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_gemini_context_25.csv')

In [42]:
def plot_llm_validation(file_path: str) -> None:
    eval_data_gemini = pd.read_csv(file_path)
    data = eval_data_gemini.LLM_VALIDATION

    weights = np.ones_like(data) / len(data)

    n, bins, patches = plt.hist(data, weights=weights, bins=10, alpha=0.5)

    plt.xlabel('Gemini Verdict', size=14)
    plt.ylabel('Percentage', size=14)
    plt.title(f'Chatbot validation - experiment_id: {str(file_path).split("/")[-1].split("__")[-1]}', size=15)

    for i in range(len(patches)):
        if n[i] > 0:
            plt.text(patches[i].get_x()+patches[i].get_width()/2., patches[i].get_height(),
                     f'{n[i]*100:.1f}%', ha='center')

    ax = plt.gca()
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    # Map x-axis values to strings
    plt.xticks([0, 1, 2], ['Incorrect Answer', 'Partial Answer', 'Correct Answer'])

    plt.show()

In [43]:
from pathlib import Path
from typing import List
from src.vectordb.gcp_vector_search.transform_and_load import get_file_names

input_dir = "data/chatbot_validation_analysis/"
file_names: List[str] = list(get_file_names(input_dir))
document_paths: List[Path] = [
    Path(input_dir) / file_name for file_name in file_names
]

for file_path in document_paths:
    plot_llm_validation(file_path)

In [1]:
from src.chatbot.prompt_templates import default_prompt
from src.chatbot.config import ChatbotConfig
from src.chatbot.base import ChatbotBase
import os
from src.vectordb.chromadb.chatbot_db import ChromaDBChatbot
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "prj-ilios-ai.json"
os.environ["PROJECT_ID"] = "602280418311"
os.environ["LOCATION"] = "us-west1"
os.environ['GCS_BUCKET']="doc_ai_storage"
vector_store = ChromaDBChatbot().vector_store
chatbot = ChatbotBase(
    config=ChatbotConfig(
        vector_store_id="",
        index_id="",
        prompt_template=default_prompt,
        max_documents=15,
    ),
    vector_store=vector_store
)

In [6]:
resp = chatbot.retriever.invoke("Who is the landlord?", limit=10)

In [8]:
len(resp)

In [9]:
from src.settings import PROJECT_ROOT_PATH
vector_store = ChromaDBChatbot(db_path=PROJECT_ROOT_PATH/'src/vectordb/chromadb/db/backup').vector_store

In [88]:
vector_store_new = ChromaDBChatbot().vector_store

In [91]:
docs = vector_store.get(include=['documents', 'metadatas', 'ids'])

In [47]:
type(docs['documents'])

In [54]:
documents = docs['documents']
metadatas = docs['metadatas']

for doc, meta in zip(docs['documents'], docs['metadatas']):
    try:
        if doc.strip() == 'N/A' or meta['value'] == 'N/A':
            print(doc)
            documents.remove(doc)
            metadatas.remove(meta)
    except:
        pass

In [None]:

vector_store_new.add_documents(docs)
vector_store_new.vector_store.persist()

In [89]:

from langchain_core.documents import Document

vector_store_new.add_documents(documents=[Document(page_content=doc, metadata=meta) for doc, meta in zip(documents, metadatas)])

In [83]:
ids = []
for doc, meta in zip(docs['documents'], docs['metadatas']):
    try:
        if doc.strip() == 'N/A' or meta['value'] == 'N/A':
            print(doc)
            print(meta)
            ids.append(meta['doc_id'])
    except:
        pass

In [90]:
vector_store.delete(ids)

In [98]:

import pandas as pd
document_path = "data/project_previews/ppa_project_preview.xlsx"
pp_data = pd.read_excel(document_path, sheet_name="Project Preview")
pp_data = pp_data.dropna()

In [99]:
pp_data

# Evaluate Gemini 15

In [3]:
import pandas as pd

In [20]:
data_site_lease = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_site_lease.csv')
data_ia = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_new_rag_ia.csv')
data_baseline = pd.read_csv('data/chatbot_validation_analysis/chatbot_validation__llm_validation_gemini_context_25.csv')

In [14]:
data_site_lease.LLM_VALIDATION.apply(lambda x: 1 if x >= 1 else 0).sum()/data_site_lease.LLM_VALIDATION.count()

In [31]:
data_ia.LLM_VALIDATION.apply(lambda x: 1 if x >= 1 else 0).sum()/data_ia.LLM_VALIDATION.count()

In [19]:
(data_ia.LLM_VALIDATION.apply(lambda x: 1 if x >= 1 else 0).sum()+1)/data_ia.LLM_VALIDATION.count()

In [28]:
baseline_site_lease = data_baseline.iloc[:17]
baseline_ai = data_baseline.iloc[17:]

In [29]:
baseline_site_lease.LLM_VALIDATION.apply(lambda x: 1 if x >= 1 else 0).sum()/baseline_site_lease.LLM_VALIDATION.count()

In [30]:
baseline_ai.LLM_VALIDATION.apply(lambda x: 1 if x >= 1 else 0).sum()/baseline_ai.LLM_VALIDATION.count()