In [3]:
import json
import csv
import os

In [None]:
def load_json_lines(filepath):
    """Load JSONL or newline-separated JSON objects from a file."""
    data = []
    with open(filepath, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line:
                data.append(json.loads(line))
    return data

def extract_id_and_text(data):
    """Extract only the 'id' and 'text' from each entry."""
    return [{'id': item['id'], 'text': item['text']} for item in data]

def write_to_csv(data, output_path):
    """Write list of dicts to CSV with UTF-8 encoding (for German chars)."""
    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['id', 'text'])
        writer.writeheader()
        writer.writerows(data)

def json_to_csv(input_path, output_path):
    data = load_json_lines(input_path)
    reduced_data = extract_id_and_text(data)
    write_to_csv(reduced_data, output_path)
    print(f"✅ Saved CSV to: {output_path}")

# Example usage:
# Assumes files are in the 'data' folder and saves CSVs in the same folder
os.makedirs('../../04_data/datasets/gerestaurant/', exist_ok=True)
json_to_csv('../../04_data/datasets/gerestaurant/train.json', '../../04_data/datasets/gerestaurant/train_text_only.csv')
json_to_csv('../../04_data/datasets/gerestaurant/test.json', '../../04_data/datasets/gerestaurant/test_text_only.csv')

✅ Saved CSV to: ../04_data/datasets/gerestaurant/train_text_only.csv
✅ Saved CSV to: ../04_data/datasets/gerestaurant/test_text_only.csv
