# Experiment Notebook - Data
These Jupyter notebooks demonstrate how to use our data models to run experiments effectively.

This notebook will illustrate how to set up Datasources, Questions, and Correct Answers, for running experiments.


In [1]:
import sqlite3
# Create a connection to the database
db_connection = sqlite3.connect('../experiment.db')


## - Initialization of Datasources
Here we initialize each datasource with example data.

In [2]:
# Import necessary libraries
from eval_data.models.datasource import DatasourceModel, DatasourceType

# Example data for datasource initialization, adapted from experiment.ipynb
datasource = DatasourceModel(db_connection).add_or_get_datasource(
    DatasourceType(name="WikiQA", description="Wiki data")
)
print(f"Datasource ID: {datasource.id}")



Datasource ID: 3


## - Document Handling
Adding documents to the datasources using Hugging Face paths.

In [3]:
# Import necessary libraries
from eval_data.models.document import DocumentModel, DocumentType

# Create an instance of DocumentModel
document_model = DocumentModel(db_connection)

# Example documents to be added, using Hugging Face paths
document = document_model.add_or_get_document(
    DocumentType(name="Ragas Wiki QA", location="explodinggradients/ragas-wikiqa;train", datasource_id=1)
)

print(f"Document ID: {document.id}")


Document ID: 2


## - QASet Creation
Establishing QA sets that contain questions and their correct answers linked to specific documents.

In [4]:
# Import necessary libraries
from eval_data.models.qaset import QASetModel, QASetType

# Create a QA Set
qaset = QASetModel(db_connection).add_or_get_qaset(
    QASetType(
        datasource_id=datasource.id,
        document_id=document.id,
        name="Ragas Wiki QA",
        location="explodinggradients/ragas-wikiqa",
    )
)
print(f"QA Set ID: {qaset.id}")


QA Set ID: 2


## - Question Generation
Generating or loading questions from a QASet source.

In [5]:
from eval_scripts.hface import load_qa_dataset
from eval_data.tools import save_question_answers

# Load the questions and answers from the huggingface dataset
test_questions, test_answers = load_qa_dataset(qaset)
print(f"Loaded {len(test_questions)} questions")
print(f"Loaded {len(test_answers)} answers")

# Save the questions and answers to the database
count_new, count_existing = save_question_answers(
    db_connection, test_questions, test_answers, qaset.id
)
print(f"Added {count_new} questions")
print(f"Skipped {count_existing} existing questions")


  from .autonotebook import tqdm as notebook_tqdm


KeyError: "Column answer not in the dataset. Current columns in the dataset: ['question', 'correct_answer', 'incorrect_answer', 'question_id', 'generated_with_rag', 'context', 'generated_without_rag']"