# Experiment Notebook - Data
These Jupyter notebooks demonstrate how to use our data models to run experiments effectively.

This notebook will illustrate how to set up Datasources, Questions, and Correct Answers, for running experiments.


In [None]:
import sqlite3
# Create a connection to the database
db_connection = sqlite3.connect('experiment.db')


## 1. Initialization of Datasources
Here we initialize each datasource with example data.

In [None]:
# Import necessary libraries
from eval_data.models.datasource import DatasourceModel, DatasourceType

# Create an instance of DatasourceModel
datasource_model = DatasourceModel(db_connection)

# Example data for datasource initialization, adapted from experiment.ipynb
datasource = DatasourceType(name="BioASQ", description="Manually curated set of biomedical Documents, Questions, and Answers")

# Adding the datasource to the database
datasource_id = datasource_model.add_datasource(datasource)
print(f"Datasource added with ID: {datasource_id}")



## 2. Document Handling
Adding documents to the datasources using Hugging Face paths.

In [None]:
# Import necessary libraries
from eval_data.models.document import DocumentModel, DocumentType

if 'datasource_id' not in globals():
    datasource_id = 3

# Create an instance of DocumentModel
document_model = DocumentModel(db_connection)

# Example documents to be added, using Hugging Face paths
documents = [
    DocumentType(name="BioASQ Document 1", location="rag-datasets/rag-mini-bioasq;text-corpus", datasource_id=1),
]

# Adding documents to the database
for document in documents:
    document_id = document_model.add_document(document)
    print(f"Document added with ID: {document_id}")


## 3. QASet Creation
Establishing QA sets that contain questions and their correct answers linked to specific documents.

In [None]:
# Import necessary libraries
from eval_data.models.qaset import QASetModel, QASetType

if 'datasource_id' not in globals():
    datasource_id = 1
if 'document_id' not in globals():
    document_id = 1

# Create an instance of QASetModel
qaset_model = QASetModel(db_connection)

# Create a QASetType instance
qaset = QASetType(
    datasource_id=datasource_id,
    document_id=document_id,
    name="BioASQ QA Set",
    location="rag-datasets/rag-mini-bioasq;question-answer-passages",
)

# Adding the QA set to the database
qaset_id = qaset_model.add_qaset(qaset)
print(f"QA Set added with ID: {qaset_id}")


## 4. Question Generation
Generating or loading questions from a QASet source.

In [None]:
from datasets import load_dataset
from eval_data.models.qaset import QASetModel, QASetType
from eval_data.models.question import QuestionModel, QuestionType

if 'datasource_id' not in globals():
    datasource_id = 1
if 'document_id' not in globals():
    document_id = 1
if 'qaset_id' not in globals():
    qaset_id = 1

# Create an instance of QASetModel
qaset_model = QASetModel(db_connection)
qaset = qaset_model.get_qaset_by_id(qaset_id)

# Load the QA dataset
path, name = qaset.location.split(";")
dataset = load_dataset(path, name)['test']

test_questions = dataset["question"]
if "ground_truth" in dataset.column_names:
    test_answers = dataset["ground_truth"]
else:
    test_answers = dataset["answer"]

print(f"Loaded {len(test_questions)} questions")
print(f"Loaded {len(test_answers)} answers")


# Save the questions and answers to the database
question_model = QuestionModel(db_connection)
for question, answer in zip(test_questions, test_answers):
    question = QuestionType(
        qaset_id=qaset_id,
        document_id=document_id,
        question=question,
        answer=answer,
    )
    question_id = question_model.add_question(question)
    print(f"Question added with ID: {question_id}")
