In [1]:
import json
import os

def process_jsonl_to_text(input_file, output_file):
    """
    Process JSONL file containing Q&A pairs and write them to a text file.
    
    Args:
        input_file (str): Path to the input JSONL file
        output_file (str): Path to the output text file
    """
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'w', encoding='utf-8') as outfile:
        for line in infile:
            try:
                # Parse the JSONL line
                data = json.loads(line.strip())
                
                # Check if the data contains messages
                if 'messages' in data:
                    messages = data['messages']
                    
                    # Find user question and assistant answer
                    question = None
                    answer = None
                    
                    for message in messages:
                        if message['role'] == 'user':
                            question = message['content']
                        elif message['role'] == 'assistant':
                            answer = message['content']
                    
                    # Write to output file if both question and answer exist
                    if question and answer:
                        outfile.write(f"Question: {question}\n\n")
                        outfile.write(f"Answer: {answer}\n\n")
                        outfile.write("-" * 80 + "\n\n")  # Separator between Q&A pairs
            except json.JSONDecodeError:
                print(f"Warning: Skipping invalid JSON line: {line[:50]}...")
            except Exception as e:
                print(f"Error processing line: {str(e)}")
    
    print(f"Processing complete. Results written to {output_file}")



In [4]:

input_file = "superia_qa_100-1.jsonl"  # Change this to your input file path
output_file = "qa_output.txt"  # Change this to your desired output file path

process_jsonl_to_text(input_file, output_file)

Processing complete. Results written to qa_output.txt
