**Step 0: Imports, constants, and API Keys!**

In [1]:
!pip install -q langchain==0.2.16 langchain_core==0.2.40 langchain_community==0.2.16 pymupdf openai 
!pip install -q langchain_openai==0.1.23 langchain-qdrant qdrant_client asyncio ragas==0.1.14 pandas
!pip install -q langsmith

In [2]:
import os
import openai
from getpass import getpass

# collect OpenAI key
openai.api_key = getpass("OpenAI API Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

**Load the data!**

In [3]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from datetime import datetime, timedelta

documents = []

for i in range(1, 5):
  loader = CSVLoader(
      file_path=f"john_wick_{i}.csv",
      metadata_columns=["Review_Date", "Review_Title", "Review_Url", "Author", "Rating"]
  )

  movie_docs = loader.load()
  for doc in movie_docs:

    # Add the "Movie Title" (John Wick 1, 2, ...)
    doc.metadata["Movie_Title"] = f"John Wick {i}"

    # convert "Rating" to an `int`, if no rating is provided - assume 0 rating
    doc.metadata["Rating"] = int(doc.metadata["Rating"]) if doc.metadata["Rating"] else 0

    # newer movies have a more recent "last_accessed_at"
    doc.metadata["last_accessed_at"] = datetime.now() - timedelta(days=4-i)

  documents.extend(movie_docs)

**Step 1: Generate synthetic data**

In [6]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context, conditional
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

GENERATOR_LLM="gpt-4o-mini"
CRITIC_LLM="gpt-4o"

generator_llm = ChatOpenAI(model=GENERATOR_LLM)
critic_llm = ChatOpenAI(model=CRITIC_LLM)
embeddings = OpenAIEmbeddings()

# Initialize data generator and set up distributions
generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

distributions = {
    simple: 0.7,
    multi_context: 0.3,
}

In [7]:
# re-chunk the data using a different size, then generate the synthetic test set
N_EVAL_QUESTIONS = 30

testset = generator.generate_with_langchain_docs(documents, N_EVAL_QUESTIONS, distributions, with_debugging_logs=True)

embedding nodes:   0%|          | 0/200 [00:00<?, ?it/s]

Filename and doc_id are the same for all nodes.


Generating:   0%|          | 0/30 [00:00<?, ?it/s]

[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 2, 'structure': 2, 'relevance': 3, 'score': 2.25}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['John Wick films', 'Wild set pieces', 'Frenetic action', 'Parabellum', 'Over-the-top']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 1, 'depth': 2, 'structure': 2, 'relevance': 2, 'score': 1.75}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Body count contribution', 'Organised crime', 'Wickedness', 'Handling a weapon', 'Visually impaired']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 1, 'structure': 2, 'relevance': 3, 'score': 2.0}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node: ['Exhilarating', 'Action film', 'Fun factor', 'Highest of any movie', 'Having a blast']
[ragas.testset.filters.DEBUG] context scoring: {'clarity': 2, 'depth': 2, 'structure': 2, 'relevance': 3, 'score': 2.25}
[ragas.testset.evolutions.DEBUG] keyphrases in merged node

**Step 2: Save for later re-use**

In [14]:
import pandas as pd
TEST_DATASET_FILE = "sdg_test_data.csv"

# Generating the test data costs money, time, and compute, so make sure to save it for later re-use
test_df = testset.to_pandas()
test_df.to_csv(TEST_DATASET_FILE,index=False)


In [16]:
test_df.head()
test_df.describe()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
count,29,29,29,29,29,29
unique,29,26,23,2,26,1
top,What aspects of the film contributed to the po...,[: 16\nReview: John Wick 3 is without a doubt ...,The answer to given question is not present in...,simple,"[{'source': 'john_wick_3.csv', 'row': 16, 'Rev...",True
freq,1,2,7,21,2,29
