In [1]:
import json
import csv
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
from dataclasses import dataclass, asdict
from typing import List, Dict
import sys
import pandas as pd

In [2]:
@dataclass
class DataRecord:
    data: List[Dict[str, str]]

    def to_json(self):
        return json.dumps(self.data, indent=4, ensure_ascii=False)

    def to_csv(self):
        output = []
        if self.data:
            keys = self.data[0].keys()
            output.append(','.join(keys))
            for row in self.data:
                output.append(','.join(row.get(k, '') for k in keys))
        return '\n'.join(output)

    def to_tsv(self):
        return self.to_csv().replace(',', '\t')

    def to_xml(self):
        root = ET.Element("root")
        for row in self.data:
            item = ET.SubElement(root, "item")
            for key, value in row.items():
                ET.SubElement(item, key).text = value
        return ET.tostring(root, encoding='unicode')

    def to_html(self):
        df = pd.DataFrame(self.data)
        return df.to_html(index=False)

In [5]:
def from_json(content):
    data = json.loads(content)
    return DataRecord(data)

In [6]:
def from_csv(content):
    reader = csv.DictReader(content.splitlines())
    return DataRecord([row for row in reader])

In [7]:
def from_tsv(content):
    reader = csv.DictReader(content.splitlines(), delimiter='\t')
    return DataRecord([row for row in reader])

In [8]:
def from_xml(content):
    root = ET.fromstring(content)
    data = []
    for item in root.findall("item"):
        row = {child.tag: child.text for child in item}
        data.append(row)
    return DataRecord(data)

In [9]:
def from_html(content):
    soup = BeautifulSoup(content, "html.parser")
    table = soup.find("table")
    rows = table.find_all("tr")
    headers = [th.text for th in rows[0].find_all("th")]
    data = []
    for row in rows[1:]:
        values = [td.text for td in row.find_all("td")]
        data.append(dict(zip(headers, values)))
    return DataRecord(data)

In [15]:
def convert(input_content, input_format, output_format):
    parsers = {
        "json": from_json,
        "csv": from_csv,
        "tsv": from_tsv,
        "xml": from_xml,
        "html": from_html,
    }
    serializers = {
        "json": DataRecord.to_json,
        "csv": DataRecord.to_csv,
        "tsv": DataRecord.to_tsv,
        "xml": DataRecord.to_xml,
        "html": DataRecord.to_html,
    }

    if input_format not in parsers or output_format not in serializers:
        raise ValueError("Unsupported format")

    record = parsers[input_format](input_content)
    return serializers[output_format](record)

In [12]:
def save_output(content, output_file):
    with open(output_file, "w", encoding="utf-8") as f:
        f.write(content)

In [14]:
input_format = "json"
output_format = "csv"

input_file = "input.json"

with open(input_file, "r", encoding="utf-8") as f:
    input_content = f.read()

formats = ["json", "csv", "tsv", "xml", "html"]
for output_format in formats:
    if output_format != "json":
        output_content = convert(input_content, "json", output_format)
        save_output(output_content, f"converted.{output_format}")