---
# Setup
1. Environment (variables & dependencies)
2. Application

In [None]:
#Env Var
from google.colab import userdata
import os
os.environ["LANGSMITH_TRACING_V2"] = "true"
os.environ["LANGSMITH_API_KEY"] = userdata.get('Smith2')
GEMINI_API_KEY = userdata.get('gemini')

In [None]:
#Dependencies
!pip install -U langsmith langchain-google-genai langchain-community

Collecting langsmith
  Downloading langsmith-0.4.5-py3-none-any.whl.metadata (15 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.7-py3-none-any.whl.metadata (7.0 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.18 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.6.18-py3-none-any.whl.metadata (9.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.met

---
## Application

In [None]:
#@title Indexing & Retrieval
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_google_genai import GoogleGenerativeAIEmbeddings # Import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# List of URLs to load documents from
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Initialize a text splitter with specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)

# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

# Add the document chunks to the "vector store" using GoogleGenerativeAIEmbeddings
vectorstore = InMemoryVectorStore.from_documents(
    documents=doc_splits,
    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GEMINI_API_KEY), # Use GoogleGenerativeAIEmbeddings
)

# With langchain we can easily turn any vector store into a retrieval component:
retriever = vectorstore.as_retriever(k=6)



In [None]:
#@title Generation
from langchain_google_genai import ChatGoogleGenerativeAI # Import ChatGoogleGenerativeAI
from langsmith import traceable

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=1, google_api_key=GEMINI_API_KEY) # Use ChatGoogleGenerativeAI

# Add decorator so this function is traced in LangSmith
@traceable()
def rag_bot(question: str) -> dict:
    # LangChain retriever will be automatically traced
    docs = retriever.invoke(question)
    docs_string = "\n\n".join(doc.page_content for doc in docs)

    instructions = f"""You are a helpful assistant who is good at analyzing source information and answering questions. Use the following source documents to answer the user's questions. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

Documents:
{docs_string}"""

    # langchain ChatModel will be automatically traced
    ai_msg = llm.invoke([
            {"role": "system", "content": instructions},
            {"role": "user", "content": question},
        ],
    )

    return {"answer": ai_msg.content, "documents": docs}

---
# Dataset

In [None]:
from langsmith import Client

client = Client()

In [None]:
from langsmith import Client

client = Client()

# Define the examples for the dataset
examples = [
    {
        "inputs": {"question": "How does the ReAct agent use self-reflection? "},
        "outputs": {"answer": "ReAct integrates reasoning and acting, performing actions - such tools like Wikipedia search API - and then observing / reasoning about the tool outputs."},
    },
    {
        "inputs": {"question": "What are the types of biases that can arise with few-shot prompting?"},
        "outputs": {"answer": "The biases that can arise with few-shot prompting include (1) Majority label bias, (2) Recency bias, and (3) Common token bias."},
    },
    {
        "inputs": {"question": "What are five types of adversarial attacks?"},
        "outputs": {"answer": "Five types of adversarial attacks are (1) Token manipulation, (2) Gradient based attack, (3) Jailbreak prompting, (4) Human red-teaming, (5) Model red-teaming."},
    }
]

# Create the dataset and examples in LangSmith
dataset_name = "Lilian Weng Blogs Q&A2"
dataset = client.create_dataset(dataset_name=dataset_name)
client.create_examples(
    dataset_id=dataset.id,
    examples=examples
)

{'example_ids': ['c5db7470-2743-43a4-b410-47d4c975e5f2',
  'd2cd6f07-cfc2-4e83-8245-1c1ac90eaf38',
  '3aec01cc-b9b6-4621-9d27-dfba4cb33111'],
 'count': 3}

---
# Evaluators

## Corectness

In [None]:
from typing_extensions import Annotated
from pydantic import BaseModel, Field

# Grade output schema
class CorrectnessGrade(BaseModel):
    # Note that the order in the fields are defined is the order in which the model will generate them.
    # It is useful to put explanations before responses because it forces the model to think through
    # its final response before generating it:
    explanation: str = Field(..., description="Explain your reasoning for the score")
    correct: bool = Field(..., description="True if the answer is correct, False otherwise.")

# Grade prompt
correctness_instructions = """You are a teacher grading a quiz.

You will be given a QUESTION, the GROUND TRUTH (correct) ANSWER, and the STUDENT ANSWER.

Here is the grade criteria to follow:
(1) Grade the student answers based ONLY on their factual accuracy relative to the ground truth answer.
(2) Ensure that the student answer does not contain any conflicting statements.
(3) It is OK if the student answer contains more information than the ground truth answer, as long as it is factually accurate relative to the  ground truth answer.

Correctness:
A correctness value of True means that the student's answer meets all of the criteria.
A correctness value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.

Avoid simply stating the correct answer at the outset."""

# Grader LLM
grader_llm_correctness = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=0, google_api_key=GEMINI_API_KEY).with_structured_output(CorrectnessGrade, method="json_mode", strict=True)


def correctness(inputs: dict, outputs: dict, reference_outputs: dict) -> dict:
    """An evaluator for RAG answer accuracy"""
    answers = f"""\
QUESTION: {inputs['question']}
GROUND TRUTH ANSWER: {reference_outputs['answer']}
STUDENT ANSWER: {outputs['answer']}"""

    # Run evaluator
    grade: CorrectnessGrade = grader_llm_correctness.invoke([
        {"role": "system", "content": correctness_instructions},
        {"role": "user", "content": answers}
    ])
    return {"score": grade.correct}

## Relevance

In [None]:
from typing_extensions import Annotated
from pydantic import BaseModel, Field

# Grade output schema
class RelevanceGrade(BaseModel):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    relevant: Annotated[bool, ..., "Provide the score on whether the answer addresses the question"]

# Grade prompt
relevance_instructions="""You are a teacher grading a quiz.

You will be given a QUESTION and a STUDENT ANSWER.

Here is the grade criteria to follow:
(1) Ensure the STUDENT ANSWER is concise and relevant to the QUESTION
(2) Ensure the STUDENT ANSWER helps to answer the QUESTION

Relevance:
A relevance value of True means that the student's answer meets all of the criteria.
A relevance value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.

Avoid simply stating the correct answer at the outset."""

# Grader LLM
grader_llm_relevance = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=1, google_api_key=GEMINI_API_KEY).with_structured_output(RelevanceGrade, method="json_mode", strict=True)


def relevance(inputs: dict, outputs: dict) -> dict:
    """A simple evaluator for RAG answer helpfulness."""
    answers = f"""\
QUESTION: {inputs['question']}
STUDENT ANSWER: {outputs['answer']}"""

# Run evaluator
    grade: RelevanceGrade = grader_llm_relevance.invoke([
        {"role": "system", "content": relevance_instructions},
        {"role": "user", "content": answers}
    ])
    return {"score": grade.relevant}

## Groundness

In [None]:
from typing_extensions import Annotated
from pydantic import BaseModel, Field

# Grade output schema
class GroundedGrade(BaseModel):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    grounded: Annotated[bool, ..., "Provide the score on if the answer hallucinates from the documents"]

# Grade prompt
grounded_instructions = """You are a teacher grading a quiz.

You will be given FACTS and a STUDENT ANSWER.

Here is the grade criteria to follow:
(1) Ensure the STUDENT ANSWER is grounded in the FACTS.
(2) Ensure the STUDENT ANSWER does not contain "hallucinated" information outside the scope of the FACTS.

Grounded:
A grounded value of True means that the student's answer meets all of the criteria.
A grounded value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.

Avoid simply stating the correct answer at the outset."""


# Grader LLM
grader_llm_groundedness = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=1, google_api_key=GEMINI_API_KEY).with_structured_output(GroundedGrade, method="json_mode", strict=True)


def groundedness(inputs: dict, outputs: dict) -> dict:
    """A simple evaluator for RAG answer groundedness."""
    doc_string = "\n\n".join(doc.page_content for doc in outputs["documents"])
    answers = f"FACTS: {doc_string}\nSTUDENT ANSWER: {outputs['answer']}"

# Run evaluator
    grade: GroundedGrade = grader_llm_groundedness.invoke([
        {"role": "system", "content": grounded_instructions},
        {"role": "user", "content": answers}
    ])
    return {"score": grade.grounded}

## Retrieval Relevance

In [None]:
from typing_extensions import Annotated
from pydantic import BaseModel, Field

# Grade output schema
class RetrievalRelevanceGrade(BaseModel):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    relevant: Annotated[bool, ..., "True if the retrieved documents are relevant to the question, False otherwise"]

# Grade prompt
retrieval_relevance_instructions = """You are a teacher grading a quiz.

You will be given a QUESTION and a set of FACTS provided by the student.

Here is the grade criteria to follow:
(1) You goal is to identify FACTS that are completely unrelated to the QUESTION
(2) If the facts contain ANY keywords or semantic meaning related to the question, consider them relevant
(3) It is OK if the facts have SOME information that is unrelated to the question as long as (2) is met

Relevance:
A relevance value of True means that the FACTS contain ANY keywords or semantic meaning related to the QUESTION and are therefore relevant.
A relevance value of False means that the FACTS are completely unrelated to the QUESTION.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.

Avoid simply stating the correct answer at the outset."""

# Grader LLM
grader_llm_rrelevance = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", temperature=1, google_api_key=GEMINI_API_KEY).with_structured_output(RetrievalRelevanceGrade, method="json_mode", strict=True)


def retrieval_relevance(inputs: dict, outputs: dict) -> dict:
    """An evaluator for document relevance"""
    doc_string = "\n\n".join(doc.page_content for doc in outputs["documents"])
    answer = f"FACTS: {doc_string}\nQUESTION: {inputs['question']}"

# Run evaluator
    grade: RetrievalRelevanceGrade = grader_llm_rrelevance.invoke([
        {"role": "system", "content": retrieval_relevance_instructions},
        {"role": "user", "content": answer}
    ])
    return {"score": grade.relevant}

---
# Run

In [None]:
def target(inputs: dict) -> dict:
    return rag_bot(inputs["question"])

experiment_results = client.evaluate(
    target,
    data=dataset_name,
    evaluators=[correctness, groundedness, relevance, retrieval_relevance],
    experiment_prefix="rag-doc-relevance",
    metadata={"version": "LCEL context, gpt-4-0125-preview"},
)
# Explore results locally as a dataframe if you have pandas installed
# experiment_results.to_pandas()

View the evaluation results for experiment: 'rag-doc-relevance-2fa21bfe' at:
https://smith.langchain.com/o/ef9e87ab-1348-4c3e-9139-19b869acd75b/datasets/9c11f3df-06f3-493b-920d-e69501289199/compare?selectedSessions=f623aa8b-9a70-49ac-b3a3-d688cb9a7775




0it [00:00, ?it/s]

## Export Results to Pandas

In [None]:
# Export experiment results to a pandas DataFrame
df_results = experiment_results.to_pandas()

# Display the first 5 rows of the DataFrame
display(df_results.head())

Unnamed: 0,inputs.question,outputs.answer,outputs.documents,error,reference.answer,feedback.correctness,feedback.groundedness,feedback.relevance,feedback.retrieval_relevance,execution_time,example_id,id
0,What are five types of adversarial attacks?,The five adversarial attacks described are tok...,[page_content='Black-box attacks assume that a...,,Five types of adversarial attacks are (1) Toke...,True,True,True,True,1.300699,3aec01cc-b9b6-4621-9d27-dfba4cb33111,c4e2c372-5e43-4b8a-9f9d-45437c8d1ac3
1,How does the ReAct agent use self-reflection?,The ReAct agent doesn't directly use self-refl...,[page_content='Self-reflection is a vital aspe...,,"ReAct integrates reasoning and acting, perform...",True,True,True,True,1.048171,c5db7470-2743-43a4-b410-47d4c975e5f2,24a2faf1-2a88-4238-980e-6666e4788a47
2,What are the types of biases that can arise wi...,Majority label bias and recency bias can arise...,[page_content='Two main types of CoT prompting...,,The biases that can arise with few-shot prompt...,True,True,True,True,0.911335,d2cd6f07-cfc2-4e83-8245-1c1ac90eaf38,73f84b8d-55ee-4ef9-87bb-7d743aaa90c0


---
---
# to gSheet

In [None]:
# @title
from google.colab import sheets
import uuid

# Convert UUID objects to strings in the DataFrame
for col in df_results.columns:
    if df_results[col].apply(lambda x: isinstance(x, uuid.UUID)).any():
        df_results[col] = df_results[col].astype(str)

sheet = sheets.InteractiveSheet(df=df_results)

https://docs.google.com/spreadsheets/d/1LE--llyG76SC9x2ucRSNkxRB7y-CpP670bHwcOF3FOA/edit#gid=0


# to gSheet_2_reduced

In [25]:
!pip install gspread
!pip install oauth2client

import gspread
from google.colab import auth
from google.auth import default

# Authenticate to Google
auth.authenticate_user()

# Get the authenticated credentials
creds, _ = default()

# Authorize gspread with the obtained credentials
gc = gspread.authorize(creds)

spreadsheet_name = "LangSmith Experiment Results"

try:
    # Try to open an existing spreadsheet
    sh = gc.open(spreadsheet_name)
    print(f"Opened existing spreadsheet: {spreadsheet_name}")
except gspread.SpreadsheetNotFound:
    # If the spreadsheet doesn't exist, create a new one
    sh = gc.create(spreadsheet_name)
    print(f"Created new spreadsheet: {spreadsheet_name}")


# Convert 'example_id' and 'id' columns to string representation
df_results['example_id'] = df_results['example_id'].astype(str)
df_results['id'] = df_results['id'].astype(str)

# Select the first worksheet
worksheet = sh.sheet1

# Clear existing content
worksheet.clear()

# Convert DataFrame to list of lists including header
data_to_write = [df_results.columns.values.tolist()] + df_results.values.tolist()

# Write the data to the worksheet starting from A1
worksheet.update(data_to_write)


Created new spreadsheet: LangSmith Experiment Results


{'spreadsheetId': '15Fy4tWjrW5nLAP2rGols1eb7PrxuTEK3M2U5rw-yHug',
 'updatedRange': 'Sheet1!A1:L4',
 'updatedRows': 4,
 'updatedColumns': 12,
 'updatedCells': 45}

---
# to gSheet_2_Gemini done

# Task
Export the pandas DataFrame `df_results` to a Google Sheet.

## Install necessary libraries

### Subtask:
Install the `gspread` and `oauth2client` libraries to interact with Google Sheets and handle authentication.


**Reasoning**:
Install the `gspread` library using a shell command.



In [None]:
!pip install gspread




**Reasoning**:
Install the `oauth2client` library using a shell command.



In [None]:
!pip install oauth2client



## Authenticate with google sheets

### Subtask:
Set up authentication to access Google Sheets from Colab. This typically involves creating a service account and downloading its credentials.


**Reasoning**:
Authenticate to Google and authorize gspread to access Google Sheets.



In [None]:
from google.colab import auth
import gspread
from oauth2client.client import GoogleCredentials

# Authenticate to Google
auth.authenticate_user()

# Authorize gspread
gc = gspread.authorize(GoogleCredentials.get_application_default())

TypeError: Credentials need to be from either oauth2client or from google-auth.

**Reasoning**:
The traceback indicates that the credentials provided to gspread are not in the expected format. The error message suggests that the credentials should be from either `oauth2client` or `google-auth`. The `google.colab.auth.authenticate_user()` function provides credentials in a format compatible with `google-auth`. Update the authorization step to use the credentials obtained from `google.colab.auth`.



In [None]:
import gspread
from google.colab import auth

# Authenticate to Google
auth.authenticate_user()

# Authorize gspread using the authenticated credentials
gc = gspread.oauth()

FileNotFoundError: [Errno 2] No such file or directory: '/root/.config/gspread/credentials.json'

**Reasoning**:
The traceback shows a FileNotFoundError, indicating that `gspread.oauth()` is looking for a credentials.json file that doesn't exist in the specified path. Since `google.colab.auth.authenticate_user()` already handles the authentication and provides credentials, we can directly use these credentials with gspread instead of relying on gspread to load them from a file.



In [None]:
import gspread
from google.colab import auth
from google.auth import default

# Authenticate to Google
auth.authenticate_user()

# Get the authenticated credentials
creds, _ = default()

# Authorize gspread with the obtained credentials
gc = gspread.authorize(creds)

## Create or open a google sheet

### Subtask:
Create a new Google Sheet or open an existing one where the DataFrame will be exported.


**Reasoning**:
Create or open a Google Sheet to export the DataFrame.



In [None]:
import gspread

spreadsheet_name = "LangSmith Experiment Results"

try:
    # Try to open an existing spreadsheet
    sh = gc.open(spreadsheet_name)
    print(f"Opened existing spreadsheet: {spreadsheet_name}")
except gspread.SpreadsheetNotFound:
    # If the spreadsheet doesn't exist, create a new one
    sh = gc.create(spreadsheet_name)
    print(f"Created new spreadsheet: {spreadsheet_name}")

Created new spreadsheet: LangSmith Experiment Results


## Export dataframe to google sheet

### Subtask:
Write the contents of the `df_results` DataFrame to the selected Google Sheet.


**Reasoning**:
Write the contents of the `df_results` DataFrame to the selected Google Sheet by selecting the first worksheet, clearing its content, converting the DataFrame to a list of lists including headers, and writing the data to the worksheet.



In [None]:
# Select the first worksheet
worksheet = sh.sheet1

# Clear existing content
worksheet.clear()

# Convert DataFrame to list of lists including header
data_to_write = [df_results.columns.values.tolist()] + df_results.values.tolist()

# Write the data to the worksheet starting from A1
worksheet.update(data_to_write)

TypeError: Object of type Document is not JSON serializable

**Reasoning**:
The error "TypeError: Object of type Document is not JSON serializable" indicates that the `outputs.documents` column in the DataFrame contains objects that cannot be directly converted to JSON by `gspread.update`. To fix this, I need to convert the `outputs.documents` column to a string representation before converting the DataFrame to a list of lists.



In [None]:
# Convert the 'outputs.documents' column to string representation
df_results['outputs.documents'] = df_results['outputs.documents'].astype(str)

# Select the first worksheet
worksheet = sh.sheet1

# Clear existing content
worksheet.clear()

# Convert DataFrame to list of lists including header
data_to_write = [df_results.columns.values.tolist()] + df_results.values.tolist()

# Write the data to the worksheet starting from A1
worksheet.update(data_to_write)

TypeError: Object of type UUID is not JSON serializable

**Reasoning**:
The error "TypeError: Object of type UUID is not JSON serializable" indicates that the DataFrame still contains columns with UUID objects that cannot be directly converted to JSON. I need to convert all columns with potentially non-serializable objects, specifically the 'example_id' and 'id' columns which likely contain UUIDs, to a string representation before converting the DataFrame to a list of lists.



In [None]:
# Convert 'example_id' and 'id' columns to string representation
df_results['example_id'] = df_results['example_id'].astype(str)
df_results['id'] = df_results['id'].astype(str)

# Select the first worksheet
worksheet = sh.sheet1

# Clear existing content
worksheet.clear()

# Convert DataFrame to list of lists including header
data_to_write = [df_results.columns.values.tolist()] + df_results.values.tolist()

# Write the data to the worksheet starting from A1
worksheet.update(data_to_write)

{'spreadsheetId': '1Z6hXm9sWL4ZuTWXtpM63hcBhbElO285E8t2tAAHGKGw',
 'updatedRange': 'Sheet1!A1:L4',
 'updatedRows': 4,
 'updatedColumns': 12,
 'updatedCells': 45}

## Summary:

### Data Analysis Key Findings

*   The necessary libraries for Google Sheets interaction (`gspread` and `oauth2client`) were confirmed to be already installed.
*   Authentication with Google Sheets was successfully established using Colab's built-in authentication flow and `gspread.authorize`.
*   A new Google Sheet named "LangSmith Experiment Results" was created as no existing sheet with that name was found.
*   Exporting the `df_results` DataFrame required converting columns containing non-serializable object types (specifically `outputs.documents`, `example_id`, and `id`) to strings before writing to the sheet.
*   The `df_results` DataFrame, after converting the specified columns to strings, was successfully written to the first worksheet of the "LangSmith Experiment Results" Google Sheet.

### Insights or Next Steps

*   Ensure that complex or non-standard data types in DataFrames are converted to string representations or a format compatible with Google Sheets before attempting export.
*   The newly created Google Sheet now contains the experiment results and can be shared or used for further analysis or visualization outside of the current environment.
