In [1]:
#Procedure combined into a single sentence. JSON file output contains fields Raw Text, Intention and Procedures Reexport original new dataset
import json
from collections import defaultdict

def load_and_process_data(filename):
    paired_data = defaultdict(lambda: {"raw_text": None, "intention": None, "procedure": []})

    with open(filename, 'r') as f:
        for line in f:
            data = json.loads(line)

            text = data['text']

            id_to_entity = {entity['id']: entity for entity in data['entities']}

            for relation in data['relations']:
                intention_entity = id_to_entity[relation['from_id']]
                procedure_entity = id_to_entity[relation['to_id']]

                intention_text = text[intention_entity['start_offset']:intention_entity['end_offset']]
                procedure_text = text[procedure_entity['start_offset']:procedure_entity['end_offset']]

                #paired_data[intention_text]["raw_text"] = text
                paired_data[intention_text]["intention"] = intention_text
                paired_data[intention_text]["procedure"].append(procedure_text)

    for key in paired_data:
        paired_data[key]["procedure"] = ". ".join(paired_data[key]["procedure"])

    return list(paired_data.values())

paired_data_list = load_and_process_data('New Dataset/New GPT Data.jsonl')

with open('Data_Intention_Procedure2.json', 'w') as f:
    json.dump(paired_data_list, f)

In [None]:
#Intention
import json

data_for_json = []

with open('admin (1).jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)

        text = data['text']
        words = text.split()

        for entity in data['entities']:
            if entity['label'] == 'Intention':
                start_word = len(text[:entity['start_offset']].split())
                end_word = len(text[:entity['end_offset']].split())

                intention_words = words[start_word:end_word]
                intention_text = ' '.join(intention_words)

                data_for_json.append({"raw_data": text, "intention": intention_text})

with open('processed_data.json', 'w') as f:
    json.dump(data_for_json, f)

In [4]:
import json

def extract_intentions(jsonl_file_path, output_json_file_path):
    output_data = []

    with open(jsonl_file_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            raw_text = data['text']
            entities = data['entities']

            intentions = []
            for entity in entities:
                if entity['label'] == 'Intention':
                    intention_text = raw_text[entity['start_offset']:entity['end_offset']]
                    intentions.append(intention_text)
            
            new_data = {
                'raw_text': raw_text,
                'intention': ' '.join(intentions)  # Combine intentions into a single string
            }

            output_data.append(new_data)

    with open(output_json_file_path, 'w') as json_file:
        json.dump(output_data, json_file, indent=4)

# Usage
extract_intentions('New Dataset/admin.jsonl', 'output.json')

In [3]:
#Including Procedure
import json

def extract_intentions_and_procedures(jsonl_file_path, output_json_file_path):
    output_data = []

    with open(jsonl_file_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            raw_text = data['text']
            entities = data['entities']

            intentions = []
            procedures = []
            for entity in entities:
                entity_text = raw_text[entity['start_offset']:entity['end_offset']]
                if entity['label'] == 'Intention':
                    intentions.append(entity_text)
                elif entity['label'] == 'Procedure':
                    procedures.append(entity_text)
            
            new_data = {
                'raw_text': raw_text,
                'intention': ' '.join(intentions),  # Combine intentions into a single string
                'procedure': ' '.join(procedures)   # Combine procedures into a single string
            }

            output_data.append(new_data)

    with open(output_json_file_path, 'w') as json_file:
        json.dump(output_data, json_file, indent=4)

# Usage
extract_intentions_and_procedures('New Dataset/admin.jsonl', 'Data.json')

In [3]:
#Raw Text which contains different intentions.
import json

def extract_raw_text_and_intentions(jsonl_file_path, output_json_file_path):
    output_data = []

    with open(jsonl_file_path, 'r') as f:
        for line in f:
            data = json.loads(line)
            raw_text = data['text']
            entities = data['entities']

            for entity in entities:
                if entity['label'] == 'Intention':
                    intention_text = raw_text[entity['start_offset']:entity['end_offset']]
                    new_data = {
                        'raw_text': raw_text,
                        'intention': intention_text
                    }
                    output_data.append(new_data)

    with open(output_json_file_path, 'w') as json_file:
        json.dump(output_data, json_file, indent=4)

# Usage
extract_raw_text_and_intentions('New Dataset/admin(2).jsonl', 'output.json')