In [26]:
import dotenv
import os
dotenv.load_dotenv()

True

In [27]:
key = os.environ["GROQ_API_KEY"]

In [28]:
model_name = "gemma2-9b-it"

In [41]:
from llama_index.llms.groq import Groq

llm = Groq(model=model_name, api_key=key, temperature=0.0,)

In [30]:
from pydantic import BaseModel
from typing import Any

class TabelCell(BaseModel):
    content: Any
    rowspan: int = 1
    colspan: int = 1

class Table(BaseModel):
    """Table in normalized format"""
    cells   : list[list[TabelCell]]

    caption : str | None = None
    content : str | None = None
    label   : str | None = None

In [123]:
tex_doc = r"""
\begin{table}
\centering
\begin{tabular}{l|r}
Item & Quantity \\\hline
Widgets & 42 \\
Gadgets & 13
\end{tabular}
\caption{\label{tab:widgets}An example table.}
\end{table}
"""

In [124]:
import json

In [133]:
prompt = fr"""
Convert the following LaTeX tabular to a JSON object with this structure:

Table:
- cells: list[list[TabelCell]]
- TabelCell: content, colspan, rowspan

Please generate JSON with LaTeX formulas, making sure to double escape all backslashes for LaTeX commands (use \\\\ instead of \\) to ensure valid JSON parsing.

This json is going to be used for a pydantic model.

Pydantic model structure:
{Table.model_json_schema()}

LATEX:
{tex_doc}
"""

response = llm.complete(prompt)

In [134]:
r = str(response)

In [135]:
r

'```json\n{\n  "cells": [\n    [\n      {\n        "content": "Item",\n        "colspan": 1,\n        "rowspan": 1\n      },\n      {\n        "content": "Quantity",\n        "colspan": 1,\n        "rowspan": 1\n      }\n    ],\n    [\n      {\n        "content": "Widgets",\n        "colspan": 1,\n        "rowspan": 1\n      },\n      {\n        "content": "42",\n        "colspan": 1,\n        "rowspan": 1\n      }\n    ],\n    [\n      {\n        "content": "Gadgets",\n        "colspan": 1,\n        "rowspan": 1\n      },\n      {\n        "content": "13",\n        "colspan": 1,\n        "rowspan": 1\n      }\n    ]\n  ],\n  "caption": "An example table.",\n  "content": null,\n  "label": "tab:widgets"\n}\n``` \n\n\n**Explanation:**\n\n* **cells:** This array holds the table cells. Each cell is represented as another array containing `TabelCell` objects.\n* **TabelCell:** Each `TabelCell` object has three properties:\n    * **content:** The text content of the cell. LaTeX formulas are 

In [136]:
left = r.index("{")
right = r.rindex("}")

In [137]:
j = r[left:right+1]

In [138]:
data = json.loads(j)

In [139]:
table = Table(**data)

In [140]:
table

Table(cells=[[TabelCell(content='Item', rowspan=1, colspan=1), TabelCell(content='Quantity', rowspan=1, colspan=1)], [TabelCell(content='Widgets', rowspan=1, colspan=1), TabelCell(content='42', rowspan=1, colspan=1)], [TabelCell(content='Gadgets', rowspan=1, colspan=1), TabelCell(content='13', rowspan=1, colspan=1)]], caption='An example table.', content=None, label='tab:widgets')

In [121]:
for row in table.cells:
    print(row)

[TabelCell(content='Element 1\\\\', rowspan=1, colspan=3), TabelCell(content='Element\\\\', rowspan=1, colspan=3)]
[TabelCell(content='Projectile', rowspan=1, colspan=1), TabelCell(content='Energy\\\\', rowspan=1, colspan=1), TabelCell(content='\\(\\sigma_{calc}\\)', rowspan=1, colspan=1), TabelCell(content='\\(\\sigma_{expt}\\)', rowspan=1, colspan=1), TabelCell(content='Energy\\\\', rowspan=1, colspan=1), TabelCell(content='\\(\\sigma_{calc}\\)', rowspan=1, colspan=1), TabelCell(content='\\(\\sigma_{expt}\\)', rowspan=1, colspan=1)]
[TabelCell(content='Element 3', rowspan=1, colspan=1), TabelCell(content='990 A', rowspan=1, colspan=1), TabelCell(content='1168', rowspan=1, colspan=1), TabelCell(content='$1547\\pm12$', rowspan=1, colspan=1), TabelCell(content='780 A', rowspan=1, colspan=1), TabelCell(content='1166', rowspan=1, colspan=1), TabelCell(content='$1239\\pm100$', rowspan=1, colspan=1)]
[TabelCell(content='Element 4', rowspan=1, colspan=1), TabelCell(content='500 A', rowspan=1

In [3]:
def miau(cox: int | str):
    print("ham")

In [5]:
miau(2)

ham


In [16]:
from pydantic import BaseModel

class NormalisedNode(BaseModel):
    """Base class for normalized elements"""
    original_content: str

    def __str__(self):
        return f"{self.__class__.__name__}(miau)"

    def __repr__(self):
        return str(self)

class Text(NormalisedNode):
    text: str

In [17]:
txt = Text(text="hello", original_content="miau")
print(txt)

Text(miau)


In [18]:
import json

# Example JSON string with LaTeX commands
json_string = '{"formula": "E = mc^2 \\\\alpha"}'

# Parse the JSON
data = json.loads(json_string)

# Access the LaTeX formula
latex_formula = data["formula"]
print(latex_formula)  # Output: E = mc^2 \alpha

E = mc^2 \alpha
