In [6]:
import json

def concatenate_jsonl_files(output_file, *input_files):
    """
    Concatenates multiple JSONL files into a single output file.

    Args:
        output_file (str): Path to the output JSONL file
        *input_files: Variable number of input JSONL file paths
    """
    with open(output_file, 'w') as outfile:
        for file_path in input_files:
            with open(file_path, 'r') as infile:
                for line in infile:
                    # Validate the line is proper JSON before writing
                    try:
                        json.loads(line)
                        outfile.write(line)
                    except json.JSONDecodeError:
                        print(f"Skipping invalid JSON line in {file_path}: {line.strip()}")

    print(f"Successfully concatenated {len(input_files)} files into {output_file}")

# Example usage:
concatenate_jsonl_files("concat_jsonl_train/concat_val.jsonl", "jsonl/k6b_val.jsonl", "jsonl/l1b_val.jsonl","jsonl/k3b_val.jsonl")

Successfully concatenated 3 files into concat_jsonl_train/concat_val.jsonl
