In [156]:
import re
import pandas as pd
import json

In [157]:
data = ['title =', 'TOML Example', '[owner]', 'name =', 'Tom Preston-Werner', 'dob =', '1979-05-27T07:32:00-08:00', '[[products]]', 'name =', 'Joybrau', 'flavour =', 'Lemon', '[database]', 'enabled =', True, 'ports =', [8000, 8001, 8002], 'temp_targets =', ['{', 'cpu =', 79.5, 'case =', 72.0, '}'], '[database.connection]', 'host =', 1, 'port =', 2, '[servers]', 'db =', 'mongodb', '[servers.alpha]', 'ip =', '10.0.0.1', 'role =', 'frontend', '[servers.beta]', 'ip =', '10.0.0.2', 'role =', 'backend', '[bananas]', 'name =', 'Chiquita', 'price =', 1.99, 'quantity =', 10, '[bananas.color]', 'red =', 0, 'green =', 0.5, 'blue =', 0, 'hosts =', ['alpha', 'omega']]

In [158]:
def handleInlineTables(data):
    result = []
    i = 0
    while i < len(data):
        value = data[i]
        if isinstance(value, list) and value[0] == "{" and value[-1] == "}":
            result.pop(i - 1)
            result.append(f"[{data[i - 1].split('=')[0].strip()}]")
            result.extend(value[1:-1])
            i += 1
        else:
            result.append(value)
        i += 1
    return result


data = handleInlineTables(data)

In [159]:
class BreakPoints:
    def __init__(self):
        self.tables = []
        self.inline_tables = []

    def setBreakPoints(self, data):
        table_pattern = re.compile(r"\[+\w+(?:\.\w+)*\]+")
        self.tables = re.findall(table_pattern, str(data))

        for element in data:
            if isinstance(element, list) and "{" in element:
                index = data.index(element)
                self.inline_tables.append(data[index - 1].split("=")[0].strip())

    def getTables(self):
        return self.tables

    def getInlineTables(self):
        return self.inline_tables

    def getTablesIndexes(self, data: list):
        indexes = []
        for table in self.tables:
            indexes.append(data.index(table))
        return indexes

    def getInlineTablesIndexes(self, data: list):
        indexes = []
        for element in data:
            if isinstance(element, list) and "{" in element:
                indexes.append(data.index(element) - 1)

        return indexes

In [160]:
breakpoints = BreakPoints()
breakpoints.setBreakPoints(data)

In [161]:
breakpoints.getTables()

['[owner]',
 '[[products]]',
 '[database]',
 '[temp_targets]',
 '[servers]',
 '[servers.alpha]',
 '[servers.beta]',
 '[bananas]',
 '[bananas.color]']

In [162]:
breakpoints.getInlineTables()

[]

In [163]:
breakpoints.getTablesIndexes(data)

[2, 7, 12, 17, 26, 29, 34, 39, 46]

In [164]:
breakpoints.getInlineTablesIndexes(data)

[]

In [165]:
def injectTablesContent(data: list) -> dict:
    breakpoints = BreakPoints()
    breakpoints.setBreakPoints(data)

    table_content = {}
    table_indexes = breakpoints.getTablesIndexes(data)
    table_count = len(table_indexes)

    for i in range(0, table_count - 1):
        table_content[breakpoints.getTables()[i]] = data[table_indexes[i] + 1:table_indexes[i + 1]]

    # Handle the last table
    table_content[breakpoints.getTables()[table_count - 1]] = data[table_indexes[table_count - 1] + 1:]

    return table_content

tables_dict = injectTablesContent(data)

In [166]:
new_data = {}
for key, values in tables_dict.items():
    for i in range(0, len(values), 2):
        column_key = key
        if column_key not in new_data:
            new_data[column_key] = []
        variable = values[i].split('=')[0].strip()
        new_data[column_key].append(f"{variable}={values[i+1]}")

# Convert the new dictionary to a DataFrame
df = pd.DataFrame.from_dict(new_data, orient='index')
df


{'[owner]': ['name=Tom Preston-Werner', 'dob=1979-05-27T07:32:00-08:00'], '[[products]]': ['name=Joybrau', 'flavour=Lemon'], '[database]': ['enabled=True', 'ports=[8000, 8001, 8002]'], '[temp_targets]': ['cpu=79.5', 'case=72.0', 'host=1', 'port=2'], '[servers]': ['db=mongodb'], '[servers.alpha]': ['ip=10.0.0.1', 'role=frontend'], '[servers.beta]': ['ip=10.0.0.2', 'role=backend'], '[bananas]': ['name=Chiquita', 'price=1.99', 'quantity=10'], '[bananas.color]': ['red=0', 'green=0.5', 'blue=0', "hosts=['alpha', 'omega']"]}


Unnamed: 0,0,1,2,3
[owner],name=Tom Preston-Werner,dob=1979-05-27T07:32:00-08:00,,
[[products]],name=Joybrau,flavour=Lemon,,
[database],enabled=True,"ports=[8000, 8001, 8002]",,
[temp_targets],cpu=79.5,case=72.0,host=1,port=2
[servers],db=mongodb,,,
[servers.alpha],ip=10.0.0.1,role=frontend,,
[servers.beta],ip=10.0.0.2,role=backend,,
[bananas],name=Chiquita,price=1.99,quantity=10,
[bananas.color],red=0,green=0.5,blue=0,"hosts=['alpha', 'omega']"


In [167]:
def extractInfoFromRow(row: pd.Series) -> dict:
    info = {}
    for element in row:
        if isinstance(element, str):
            variable, value = element.split('=')
            info[variable.strip()] = value.strip()
    return info

# Extract the information from each row
results = {}
for i in range(len(df)-1):
    result = extractInfoFromRow(df.iloc[i])
    if '[[' in df.index[i]:
        results[df.index[i]] = result    
    else:
        results[df.index[i].strip('[').strip(']')] = result

# Handle the last row
result = extractInfoFromRow(df.iloc[-1])
results[df.index[-1].strip('[]')] = result

results

{'owner': {'name': 'Tom Preston-Werner', 'dob': '1979-05-27T07:32:00-08:00'},
 '[[products]]': {'name': 'Joybrau', 'flavour': 'Lemon'},
 'database': {'enabled': 'True', 'ports': '[8000, 8001, 8002]'},
 'temp_targets': {'cpu': '79.5', 'case': '72.0', 'host': '1', 'port': '2'},
 'servers': {'db': 'mongodb'},
 'servers.alpha': {'ip': '10.0.0.1', 'role': 'frontend'},
 'servers.beta': {'ip': '10.0.0.2', 'role': 'backend'},
 'bananas': {'name': 'Chiquita', 'price': '1.99', 'quantity': '10'},
 'bananas.color': {'red': '0',
  'green': '0.5',
  'blue': '0',
  'hosts': "['alpha', 'omega']"}}

In [168]:
def handleNestedTables(input_dict):
    keys_to_remove = []
    keys_copy = list(input_dict.keys())
    
    for key in keys_copy:
        if '.' in key:
            parent_key, nested_key = key.split('.', 1)
            parent_key = parent_key.strip('[').strip(']')  # Remove brackets from parent_key
            nested_key = nested_key.strip('[').strip(']')  # Remove brackets from nested_key
            if parent_key in input_dict:
                input_dict[parent_key][nested_key] = input_dict[key]
            else:
                input_dict[parent_key] = {nested_key: input_dict[key]}
            keys_to_remove.append(key)

    for key in keys_to_remove:
        del input_dict[key]

    return input_dict

results = handleNestedTables(results)

In [169]:
def handleArrayTables(input_dict: dict):
    return_dict = {}
    
    for key, value in input_dict.items():
        if '[[' in key:
            new_key = key.strip('[').strip(']')
            new_value = [{k: v} for k, v in value.items()]
            return_dict[new_key] = new_value
        else:
            return_dict[key] = value
        
    return return_dict

results = handleArrayTables(results)


In [170]:
# Convert results to json
def writeJsonFile(results, file_name):
    with open(file_name, 'w') as f:
        f.write(json.dumps(results, indent=4))