In [None]:
# Clone the SleepQA repo
!git clone https://github.com/IvaBojic/SleepQA.git
%cd SleepQA

Cloning into 'SleepQA'...
remote: Enumerating objects: 400, done.[K
remote: Counting objects: 100% (87/87), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 400 (delta 46), reused 43 (delta 19), pack-reused 313 (from 1)[K
Receiving objects: 100% (400/400), 31.13 MiB | 7.20 MiB/s, done.
Resolving deltas: 100% (176/176), done.
Updating files: 100% (134/134), done.
Filtering content: 100% (3/3), 1.21 GiB | 50.43 MiB/s, done.
/content/SleepQA


In [None]:
!pip install mne pandarallel rdkit-pypi transformers accelerate polars
!git clone https://github.com/asahoo0/PyHealth.git
%cd PyHealth

Collecting mne
  Downloading mne-1.9.0-py3-none-any.whl.metadata (20 kB)
Collecting pandarallel
  Downloading pandarallel-1.6.5.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)
Collecting dill>=0.3.1 (from pandarallel)
  Downloading dill-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from

In [11]:
import pandas as pd
import os
from pyhealth.datasets import BaseDataset
from typing import Dict, Tuple, List

class SleepQADataset(BaseDataset):
    def __init__(self, root: str = "/content/SleepQA/data/training", dev: bool = False):
        # Load and prepare data
        corpus_path = os.path.join(root, "sleep-corpus.tsv")
        qa_path = os.path.join(root, "sleep-test.csv")

        corpus_df = pd.read_csv(corpus_path, sep="\t", header=None, names=["id", "passage", "title"])
        qa_df = pd.read_csv(qa_path, sep="\t", header=None, names=["question", "answer"])

        if dev:
            corpus_df = corpus_df.sample(n=500, random_state=42)
            qa_df = qa_df.sample(n=50, random_state=42)

        # Define tables dictionary for BaseDataset
        tables = {
            "corpus": corpus_df,
            "qa": qa_df
        }

        super().__init__(dataset_name="SleepQA", root=root, tables=tables, config_path="/content/sleepQA.yaml", dev=dev)

        # Store examples for task-level work later
        self.examples = [
            {"question": row["question"].strip(), "answer": row["answer"].strip()}
            for _, row in qa_df.iterrows()
        ]
        self.corpus = corpus_df

    def get_raw_samples(self) -> Tuple[pd.DataFrame, List[Dict[str, str]]]:
        return self.corpus, self.examples

# Main function to instantiate and test the dataset class
def main():
    dataset = SleepQADataset(dev=True)  # Set dev=True for smaller data
    corpus, examples = dataset.get_raw_samples()

    print(f"Corpus:\n{corpus.head()}")
    print(f"Examples:\n{examples[:5]}")  # Print the first 5 examples

if __name__ == "__main__":
    main()


Initializing SleepQA dataset from /content/SleepQA/data/training (dev mode: True)


INFO:pyhealth.datasets.base_dataset:Initializing SleepQA dataset from /content/SleepQA/data/training (dev mode: True)


Scanning table: corpus from /content/SleepQA/data/training/sleep-corpus.tsv


INFO:pyhealth.datasets.base_dataset:Scanning table: corpus from /content/SleepQA/data/training/sleep-corpus.tsv


Scanning table: qa from /content/SleepQA/data/training/sleep-test.csv


INFO:pyhealth.datasets.base_dataset:Scanning table: qa from /content/SleepQA/data/training/sleep-test.csv


Corpus:
        id                                            passage  \
2966  2966  heavy drinking means more than 15 drinks per w...   
6961  6965  older beds tend to sag in places, which reduce...   
3214  3215  booster shots may be given with the same vacci...   
437    437  if you go this route, you can try to sell your...   
263    263  relaxation exercises may be a component of cbt...   

                                                  title  
2966                                  alcohol and sleep  
6961              when should you replace your mattress  
3214  covid-19 vaccine: how sleep increases immune r...  
437                              how to ship a mattress  
263                                   anxiety and sleep  
Examples:
[{'question': 'what is a benefit of stomach sleeping?', 'answer': '["can help relieve snoring"]'}, {'question': 'what does good sleep during trips do?', 'answer': '["promotes wellness that lets you get the most out of any trip"]'}, {'question'