# Transform `.txt` guided-meditation-collection to `.jsonl`

In [6]:
import os
import json
import re
import random

# Path to the directory containing .txt files
input_dir = '../data/input/text/guided-meditation-collection/cleaned-txt'
output_path = '../data/input/text/guided-meditation-collection/jhana-guided-meditation-collection.jsonl'

# Sample instructions for meditation guidance
instructions_examples = [
    "Guide me through a meditation to cultivate feelings of love and compassion",
    "Lead a loving kindness practice",
    "Assist me in a meditation for cultivating compassion towards others",
    "Help cultivate warm-heartedness",
    "Guide a meditation for feeling connected and open-hearted",
    "Facilitate heart-felt meditation",
    "Meditation guidance for loving-kindness towards self and others",
    "Compassion practice guidance",
    "Meditation for cultivating goodness and peace",
    "Assist in developing friendliness through meditation",
    "Guide a gratitude and positivity meditation",
    "Teach a metta meditation",
    "Loving-kindness and compassion meditation guidance",
    "Concentration and sensory experience meditation on Vedana",
    "Vedana meditation guide",
    "Assist in feeling emotions through meditation",
    "Loving kindness meditation facilitation"
]

def create_jsonl_object(system, instruction, output):
    return {
        "system": system,
        "instruction": instruction,
        "output": output.lstrip()  # Remove leading spaces
    }

def transform_silence(text):
    # Convert "silence: 59.0" to "[59.0]" without adding new lines
    return re.sub(r'silence: (\d+\.\d+)', r'[\1]', text)

def convert_txt_to_jsonl(input_dir, output_path):
    if not os.path.isdir(input_dir):
        return "Input directory does not exist."

    jsonl_objects = []
    for file_name in filter(lambda f: f.endswith('.txt'), os.listdir(input_dir)):
        with open(os.path.join(input_dir, file_name), 'r') as file:
            content = transform_silence(file.read())

        jsonl_object = create_jsonl_object(
            system="Guidance as a meditation assistant for Jhana meditation",
            instruction=random.choice(instructions_examples),
            output=content
        )
        jsonl_objects.append(jsonl_object)

    with open(output_path, 'w') as output_file:
        for obj in jsonl_objects:
            output_file.write(json.dumps(obj) + '\n')

    return "Conversion to JSONL completed."

# Execute the conversion process
convert_txt_to_jsonl(input_dir, output_path)


'Conversion to JSONL completed.'

In [1]:
import os
import json
import re
import random

# Path to the directory containing .txt files
input_dir = '../data/input/text/guided-meditation-collection/cleaned-txt'
output_path = '../data/input/text/guided-meditation-collection/jhana-guided-meditation-collection.jsonl'

# Sample instructions for meditation guidance
instructions_examples = [
    "Guide me through a meditation to cultivate feelings of love and compassion",
    "Lead a loving kindness practice",
    "Assist me in a meditation for cultivating compassion towards others",
    "Help cultivate warm-heartedness",
    "Guide a meditation for feeling connected and open-hearted",
    "Facilitate heart-felt meditation",
    "Meditation guidance for loving-kindness towards self and others",
    "Compassion practice guidance",
    "Meditation for cultivating goodness and peace",
    "Assist in developing friendliness through meditation",
    "Guide a gratitude and positivity meditation",
    "Teach a metta meditation",
    "Loving-kindness and compassion meditation guidance",
    "Assist in feeling emotions through meditation",
    "Loving kindness meditation facilitation"
]

def create_jsonl_object(system, instruction, output):
    return {
        "system": system,
        "instruction": instruction,
        "output": output.lstrip()  # Remove leading spaces
    }

def apply_replacements(text):
    replacements = {
        "Dhammas": "jhanas",
        "10th": "tense",
        "worms": "warmth",
        "Worms": "warmth",
        "Philip Worms": "feel the warmth",
        "Mittha": "metta",
        "metha": "metta",
        "no order to": "now in order to",
        "North America": "the world",
        "at Oakwood": "at this place",
        # Corrections for 'jhana'
        "jhāna": "jhana", "chana": "jhana", "jnana": "jhana",
        "jhanah": "jhana", "jhan": "jhana", "jharnas": "jhanas", "janas": "jhanas",
        "jauna": "jhana", "janna": "jhana", "janam": "jhana", "jana": "jhana",
        "jaina": "jhana", "shana": "jhana", "Chana": "jhana", "Jhanas": "jhanas",
        "Jnana": "jhana", "Jhanah": "jhana", "Jhan": "jhana", "Jharnas": "jhanas",
        "Janas": "jhanas", "Jauna": "jhana", "Janna": "jhana", "Janam": "jhana",
        "Jana": "jhana", "Jaina": "jhana", "Shana": "jhana",
        "Janus": "jhanas", "janus": "jhanas",
        "Jhanus": "jhanas", "jhanus": "jhanas",
        "jnanas": "jhanas", "Jnanas": "jhanas",
        "jāna": "jhana", "Jāna": "jhana", "jānas": "jhanas", "Jānas": "jhanas",
        "jama": "jhana", "Jama": "jhana", "jamas": "jhanas", "Jamas": "jhanas",
        "johnna": "jhana", "Johnna": "jhana", 
        "jhani": "jhana", "Jhani": "jhana", "jhanis": "jhanas", "Jhanis": "jhanas",
        "vipassanjana": "vipassanajhana", "Vipassanjana": "vipassanajhana",
        "vaginas": "jhanas", "Vaginas": "jhanas",
        "vagina": "jhana",
        "vaginal": "jhana",
        # Corrections for 'jhanic'
        "jonic": "jhanic", "Jonic": "jhanic",
        "jānic": "jhanic", "Jānic": "jhanic",
        # Corrections for 'jhanically'
        "jonically": "jhanically", "Jonically": "jhanically",
        # Corrections for 'sukkha'
        "sukha": "sukkha", "suka": "sukkha", "sukho": "sukkha", "sukhas": "sukkha",
        "sukkha": "sukkha", "sukhara": "sukkha", "sukah": "sukkha",
        "sukhi": "sukkha", "sukh": "sukkha", "sukkhas": "sukkha", "sukka": "sukkha",
        "Suka": "sukkha", "suka": "sukkha", 
        "Sukho": "sukkha", "Sukhas": "sukkha", "Sukkha": "sukkha",
        "Sukhara": "sukkha", "Sukah": "sukkha", "Sukhi": "sukkha", "Sukh": "sukkha",
        "Sukkhas": "sukkha", "Sukka": "sukkha", "Sukha": "sukkha",
        "SUCA": "sukkha", "suca": "sukkha", "Suca": "sukkha", "sukkah": "sukkha", "Sukkah": "sukkha",
        "sukkah": "sukkha", "Sukkah": "sukkha",
        "suko": "sukkha", "Suko": "sukkha",
        "suga": "sukkha", "Suga": "sukkha",
        "sookah": "sukkha", "Sookah": "sukkha",
        "Sookha": "sukkha", "Sook-ka": "sukkha",
        "sook": "sukkha", "Sook": "sukkha",
        "tsuka": "sukkha", "Tsuka": "sukkha",
        # Corrections for 'dukkha'
        "duhka": "dukkha", "Duhka": "dukkha", "duhkas": "dukkhas", "Duhkas": "dukkhas",
        "dukha": "dukkha", "Dukha": "dukkha", "duhkas": "dukkhas", "Dukhas": "dukkhas",
        # Corrections for 'piti'
        "pithi": "piti", "Pithi": "piti", "pitti": "piti", "Pitti": "piti",
        "PT": "piti", "Pity": "piti", "pitya": "piti", "Pitya": "piti",
        "pity": "piti", "Pity": "piti", "pete": "piti", "Pete": "piti",
        " Pt ": " piti ", " pt ": " piti ",
        "Pt": "piti", "pt": "piti",
        "pd": "piti", "Pd": "piti",
        "PD": "piti",
        "pti": "piti", "Pti": "piti",
        "pitta": "piti", "Pitta": "piti",
        "ptsuka": "piti-sukkha",
        # Corrections for 'samatha'
        "shamata": "samatha", "shamatha": "samatha", "Shamata": "samatha", "Shamatha": "samatha",
        # Corrections for 'vitakka'
        "vitaka": "vitakka", "Vitaka": "vitakka", "vittaka": "vitakka", "Vittaka": "vitakka",
        "vidakka": "vitakka", "Vidakka": "vitakka",
        "vittakka": "vitakka", "Vittakka": "vitakka",
        "vataka": "vitakka", "Vataka": "vitakka",
        "vittacca": "vitakka", "Vittacca": "vitakka",
        "vittak": "vitakka", "Vittak": "vitakka",
        "vittakka": "vitakka", "Vittakka": "vitakka",
        # Corrections for 'vichara':
        "vichar": "vichara", "Vichar": "vichara",
        # Corrections for 'metta'
        "metha": "metta", "Metha": "metta", "meta": "metta", "Meta": "metta",
        # Corrections for 'Visuddhimagga'
        "Vasuti Maga": "Visuddhimagga", "Vasude maga": "Visuddhimagga", "Vasudhimagra": "Visuddhimagga",
        "Visuddhimagra": "Vissuddhimagga", "Visuddhimaga": "Visuddhimagga",
        "vesuddhimaga": "Visuddhimagga", "Vesuddhimaga": "Visuddhimagga",
        "vishuddhimag": "Visuddhimagga", "Vishuddhimag": "Visuddhimagga",
        "vishuddhimag": "Visuddhimagga", "Vishuddhimag": "Visuddhimagga",
        "Visiddhi Maga": "Visuddhimagga", "Visiddhi Magga": "Visuddhimagga",
        "Vasude Maga": "Visuddhimagga", "Vasude maga": "Visuddhimagga",
        "Visiddhi Maga": "Visuddhimagga",
        # Corrections for 'Nimitta'
        "nimuta": "nimitta", "Nimuta": "nimitta",
        # Corrections for 'Kalapa'
        "kalabhas": "kalapas", "kalabha": "kalapa", "kalabhas": "kalapas", "kalabha": "kalapa",
        # Corrections for 'Ekaggata'
        "ikagata": "ekaggata", "ikaggata": "ekaggata",
        "kagata": "ekaggata", "kagata": "ekaggata", "Kagata": "ekaggata",
        "ekaggata": "ekaggata", "Ekaggata": "ekaggata",
        "ekagata": "ekaggata", "Ekagata": "ekaggata",
        "ekakata": "ekaggata", "Ekakata": "ekaggata",
        "akagata": "ekaggata", "Akagata": "ekaggata",
        # Corrections for 'Vipassana'
        "viparsana": "vipassana", "vipasana": "vipassana",
        "Viparsana": "vipassana", "Vipasana": "vipassana",
        # Corrections for 'anicca'
        "anicya": "anicca", "Anicya": "anicca",
        "anicha": "anicca", "Anicha": "anicca",
        # Corrections for 'Upekkha'
        "Upekha": "upekkha", "upekha": "upekkha",
        "Upeka": "upekkha", "upeka": "upekkha",
        # Corrections for 'drenches'
        "dringes": "drenches",
        # Corrections for 'hindrances'
        "hindrins": "hindrances",
        # Corrections for 'panna'
        "panya": "panna",
        "Panya": "panna",
    }
    for old, new in replacements.items():
        text = text.replace(old, new)
    return text

def transform_silence(text):
    # Apply replacements, remove newline characters, and transform "silence" notation
    text = apply_replacements(text)
    text_no_silence = re.sub(r'silence: (\d+\.\d+)', r'[\1]', text)
    return text_no_silence.replace('\n', '')

def convert_txt_to_jsonl(input_dir, output_path):
    if not os.path.isdir(input_dir):
        return "Input directory does not exist."

    jsonl_objects = []
    for file_name in filter(lambda f: f.endswith('.txt'), os.listdir(input_dir)):
        with open(os.path.join(input_dir, file_name), 'r') as file:
            content = transform_silence(file.read())

        jsonl_object = create_jsonl_object(
            system="You are a meditation assistant who guides the user through a Jhana meditation",
            instruction=random.choice(instructions_examples),
            output=content
        )
        jsonl_objects.append(jsonl_object)

    with open(output_path, 'w') as output_file:
        for obj in jsonl_objects:
            output_file.write(json.dumps(obj) + '\n')

    return "Conversion to JSONL completed."

# Execute the conversion process
convert_txt_to_jsonl(input_dir, output_path)


'Conversion to JSONL completed.'

# Overview

This created a 75-row JSONL file from a guided-meditation-collection text file. Each row is a JSON object.

Then, I gave this prompt to GPT-4, to increase the dataset size:

```
read the following JSONL file. create an additional 10 examples in JSONL format. keep the "system" values the same.  vary the "instruction" based on these examples, which should match with the "output".  the "output" of the new examples should be very similar to the following examples. create some slightly different - but mostly similar - examples. include pauses, as in these examples: 
```
