**<h3 style="text-align: center; color: #edc9af;">CONVERT AND MERGE FILES TO ONE FILE </h3>**

In [1]:
import os
import json
import pandas as pd
from datetime import datetime

In [2]:
event_file="equipenhance"

In [3]:
input_folder = "data/data_json"
output_folder = "data"

jsonl_files = sorted([
    f for f in os.listdir(input_folder)
    if f.startswith(f"tmp_{event_file}") and f.endswith(".jsonl")
])

if not jsonl_files:
   print("Cannot find*.jsonl in data/data_json/")

In [4]:
def extract_datetime_from_filename(filename: str, part: str) -> str:
    base = filename.replace(".jsonl", "").split("tmp_")[1]
    start_str, end_str = base.split("_to_")
    return start_str if part == 'start' else end_str

first_time_raw = extract_datetime_from_filename(jsonl_files[0], 'start')
last_time_raw  = extract_datetime_from_filename(jsonl_files[-1], 'end')

In [5]:
def format_for_filename(ts: str) -> str:
    return ts.replace(":", "-").replace("T", "_")

first_time = format_for_filename(first_time_raw)
last_time  = format_for_filename(last_time_raw)

output_csv_name = f"m952_{first_time}_to_{last_time}.csv"
output_csv_path = os.path.join(output_folder, output_csv_name)

In [6]:
if os.path.exists(output_csv_path):
    print(f"File exist: {output_csv_path} , stopped.")

else: 
    all_files = sorted(jsonl_files)
    is_first = True
    total_records = 0

    for file in all_files:
        full_path = os.path.join(input_folder, file)
        with open(full_path, "r", encoding="utf-8") as f:
            lines = []
            for line in f:
                try:
                    obj = json.loads(line.strip())
                    lines.append(obj)
                except json.JSONDecodeError:
                    print(f"Error decoding line in file: {file}")
            
            if lines:
                df = pd.json_normalize(lines)
                df.to_csv(output_csv_path, index=False, mode='w' if is_first else 'a', header=is_first)
                total_records += len(df)
                is_first = False
                print(f"Done: {file} — {len(df)} records")

    print(f"\nCompleted convert {total_records:,} records to: {output_csv_path}")

Done: tmp_equipenhance_2025-05-15_00-00-00_to_2025-05-16_00-00-00.jsonl — 166238 records
Done: tmp_equipenhance_2025-05-16_00-00-00_to_2025-05-17_00-00-00.jsonl — 173005 records
Done: tmp_equipenhance_2025-05-17_00-00-00_to_2025-05-18_00-00-00.jsonl — 167147 records
Done: tmp_equipenhance_2025-05-18_00-00-00_to_2025-05-19_00-00-00.jsonl — 148782 records
Done: tmp_equipenhance_2025-05-19_00-00-00_to_2025-05-20_00-00-00.jsonl — 195425 records
Done: tmp_equipenhance_2025-05-20_00-00-00_to_2025-05-21_00-00-00.jsonl — 180148 records
Done: tmp_equipenhance_2025-05-21_00-00-00_to_2025-05-22_00-00-00.jsonl — 172785 records
Done: tmp_equipenhance_2025-05-22_00-00-00_to_2025-05-23_00-00-00.jsonl — 164246 records
Done: tmp_equipenhance_2025-05-23_00-00-00_to_2025-05-24_00-00-00.jsonl — 165160 records
Done: tmp_equipenhance_2025-05-24_00-00-00_to_2025-05-25_00-00-00.jsonl — 153422 records
Done: tmp_equipenhance_2025-05-25_00-00-00_to_2025-05-26_00-00-00.jsonl — 157622 records
Done: tmp_equipenhanc