In [1]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from bs4 import BeautifulSoup as Soup
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langsmith.evaluation._runner import evaluate
from langsmith.evaluation.integrations import LangChainStringEvaluator
from langchain.evaluation.qa.eval_chain import CotQAEvalChain


from langchain import hub
from langchain import prompts
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI

In [8]:
encode_kwargs = {"normalize_embeddings": True}
embedding_function = HuggingFaceEmbeddings(
    model_name='sentence-transformers/all-MiniLM-L6-v2',
    model_kwargs={"device": "cpu"},
    encode_kwargs=encode_kwargs,
)

db_faiss = FAISS.load_local("/Users/justinvhuang/Desktop/CSE-6242-Group-Project/app/faiss_anime_index_v3", embeddings = embedding_function)
retriever = db_faiss.as_retriever(search_kwargs={"k": 10})

In [9]:
import yaml

# Load API key from config.yaml
with open("/Users/justinvhuang/Desktop/CSE-6242-Group-Project/app/config.yaml", "r") as file:
    config = yaml.safe_load(file)

api_key = config["api_key"]
import google.generativeai as genai
userdata = {"GOOGLE_API_KEY": api_key}
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [10]:
#### RETRIEVAL and GENERATION ####


# Generation
generator = prompts.ChatPromptTemplate.from_messages(
    [
        ("system", "Respond using the following documents as context:\n{documents}"),
        ("user", "{question}"),
    ]
) | ChatGoogleGenerativeAI(model="gemini-pro", temperature=0,google_api_key = GOOGLE_API_KEY,convert_system_message_to_human=True)


# Chain
rag_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | generator | StrOutputParser()

# Question
rag_chain.invoke("what are anime from studio ghbili?")

"Studio Ghibli has produced 18 feature films, including:\n- Castle in the Sky\n- Grave of the Fireflies\n- My Neighbor Totoro\n- Kiki's Delivery Service\n- Only Yesterday\n- Porco Rosso\n- Pom Poko\n- Whisper of the Heart\n- Princess Mononoke\n- My Neighbors the Yamadas\n- Spirited Away\n- The Cat Returns\n- Howl's Moving Castle\n- Tales from Earthsea\n- Ponyo\n- Arrietty\n- From Up on Poppy Hill\n- The Wind Rises\n- When Marnie Was There"

In [7]:
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0,google_api_key = GOOGLE_API_KEY,convert_system_message_to_human=True)

In [21]:
# QA
inputs = [
    "Who is Fririen",
    "What are good pirate anime?",
    "What are some good kid anime?",
    "What is the name of monkey d luffy's idol?",
    "What is the name of the space cowboy bounty hunter?",
    "What are some good animes from studio ghibli?",
    "What are some 90s anime?"
]

outputs = [
    "Fririen is a fictional character in a story or game.",
    '"One Piece" is a popular pirate anime.',
    '"Pokémon," "My Neighbor Totoro," and "SpongeBob SquarePants" are good kid anime.',
    "Monkey D. Luffy's idol is Red-Haired Shanks.",
    "The space cowboy bounty hunter's name is Spike Spiegel.",
    'Some animes from Studio Ghibli include "Spirited Away," "My Neighbor Totoro," and "Princess Mononoke."',
    'Some 90s anime include "Neon Genesis Evangelion," "Dragon Ball Z," "Sailor Moon," "Cowboy Bebop," "Ghost in the Shell," and "Pokémon."'
    ]

# Dataset
qa_pairs = [{"question": q, "answer": a} for q, a in zip(inputs, outputs)]

In [22]:
from langsmith import Client
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true' # enables tracing 
os.environ['LANGCHAIN_API_KEY'] = 'ls__8b6fd8fd0f1643e2aaae08f31c00de1e'

In [23]:
# Create dataset
client = Client()
dataset_name = "RAG_ANIMEv3"

# Store
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="QA pairs about ANIME.",
)
client.create_examples(
    inputs=[{"question": q} for q in inputs],
    outputs=[{"answer": a} for a in outputs],
    dataset_id=dataset.id,
)

In [24]:
# Evaluators
qa_evalulator = [LangChainStringEvaluator("cot_qa", config={"llm":ChatGoogleGenerativeAI(model="gemini-pro", 
                                                                                         temperature=0,google_api_key = GOOGLE_API_KEY,convert_system_message_to_human=True)})]
dataset_name = "RAG_ANIMEv3"

def predict(example: dict):
    return {"answer": rag_chain.invoke(example["question"])}
    
experiment_results = evaluate(
    predict,
    data=dataset_name,
    evaluators=qa_evalulator,
    experiment_prefix="rag-qa-oai",
    # Any experiment metadata can be specified here
    metadata={
      "variant": "ANIME context, gemini",
    },
)

View the evaluation results for experiment: 'rag-qa-oai-4257f722' at:
https://smith.langchain.com/o/ee2bfa72-1f76-5322-9029-9ae994e50470/datasets/9f203d1f-da4a-45ce-8f07-1f7477dd4c90/compare?selectedSessions=710139bc-57ba-474f-b69a-6251f749617e




0it [00:00, ?it/s]