In [4]:
import sys, re, csv
from pathlib import Path

In [7]:
COLUMNS = ["ID", "class ID", "Recording ID", "Ship Name",
           "Date & Time", "Duration(sec)", "Distances(m)"]
           
def clean_line(s):
    # 统一空白和连字符
    return (s.replace("\u00A0", " ")   # nbsp -> space
             .replace("\u2013", "-")  # en-dash –
             .replace("\u2014", "-")  # em-dash —
             .replace("\u2212", "-")  # minus sign −
             .strip())

def parse_file(path):
    rows, bad = [], 0
    with open(path, "r") as f:
        for line_num, raw in enumerate(f, 1):
            line = clean_line(raw)

            # 使用逗号分割数据
            parts = [part.strip() for part in line.split(',')]
            
            # 提取各个字段
            id = parts[0].strip()
            class_id = parts[1].strip() 
            ship_name = parts[2].strip()
            date_part = parts[3].strip()
            time_part = parts[4].strip()
            duration = parts[5].strip()
            distances = parts[6].strip()
            
            # 合并日期和时间，格式化为 YYYYMMDD:HHMMSS
            date_time = f"{date_part}:{time_part}"
            
            # 这里假设 Recording ID 和 ID 相同，或者可以根据需要调整
            recording_id = id # 或者可以设置为其他值
            
            rows.append([id, class_id, recording_id, ship_name, date_time, duration, distances])
    
    return rows

In [None]:
in_path = Path(r"D:\Git_Clone\Multi-feature-fusion-model-for-ship-audio\DeepShip\cargo-metafile")
out_path = in_path.with_suffix(".csv")
rows = parse_file(in_path)
with open(out_path, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(COLUMNS)
        writer.writerows(rows)
print(f"Saved: {out_path})")

Saved: D:\Git_Clone\Multi-feature-fusion-model-for-ship-audio\DeepShip\cargo-metafile.csv  (rows: 110)
