In [6]:
from datasets import load_from_disk
import xml.etree.ElementTree as ET

In [7]:
ds = load_from_disk("posts/combined")

In [12]:
example = ds['Body'][55]
example

'<p>Could someone please point me in the right direction with respect to OHLC data timeframe conversion with <a href="http://pandas.pydata.org/" rel="noreferrer">Pandas</a>? What I\'m trying to do is build a Dataframe with data for higher timeframes, given data with lower timeframe. </p>\n\n<p>For example, given I have the following one-minute (M1) data:</p>\n\n<pre><code>                       Open    High     Low   Close  Volume\nDate                                                       \n1999-01-04 10:22:00  1.1801  1.1819  1.1801  1.1817       4\n1999-01-04 10:23:00  1.1817  1.1818  1.1804  1.1814      18\n1999-01-04 10:24:00  1.1817  1.1817  1.1802  1.1806      12\n1999-01-04 10:25:00  1.1807  1.1815  1.1795  1.1808      26\n1999-01-04 10:26:00  1.1803  1.1806  1.1790  1.1806       4\n1999-01-04 10:27:00  1.1801  1.1801  1.1779  1.1786      23\n1999-01-04 10:28:00  1.1795  1.1801  1.1776  1.1788      28\n1999-01-04 10:29:00  1.1793  1.1795  1.1782  1.1789      10\n1999-01-04 10:3

In [13]:
root = ET.fromstring("<body>" + example + "</body>")

In [14]:
def traverse(node):
    traverse_children = False
    text = ""
    match node.tag:
        case "a":
            return f"[{node.text}]({node.attrib['href']})"
        case "h1":
            return f"\n# {node.text}\n\n"
        case "h2":
            return f"\n## {node.text}\n\n"
        case "h3":
            return f"\n### {node.text}\n\n"
        case "h4":
            return f"\n#### {node.text}\n\n"
        case "h5":
            return f"\n##### {node.text}\n\n"
        case "h6":
            return f"\n###### {node.text}\n\n"
        case "code":
            return f"`{node.text}`"
        case "blockquote":
            text = f"> "
            traverse_children = True
        case "hr":
            return f"\n---\n\n"
        case "img":
            return f"![{node.attrib['alt']}]({node.attrib['src']})"
        case "li":
            text = f"- {node.text}"
            traverse_children = True
        case "pre":
            has_code = False
            for child in node:
                if child.tag == "code":
                    has_code = True
                    text = f"```\n{child.text}\n```"
            if not has_code:
                traverse_children = True
                if node.text:
                    text = node.text + text
        case other:
            traverse_children = True
            if node.text:
                text = node.text + text
    if traverse_children:
        for child in node:
            text += traverse(child)
    if node.tail:
        text += node.tail
    return text

print(traverse(root))

Could someone please point me in the right direction with respect to OHLC data timeframe conversion with [Pandas](http://pandas.pydata.org/)

For example, given I have the following one-minute (M1) data:

```
                       Open    High     Low   Close  Volume
Date                                                       
1999-01-04 10:22:00  1.1801  1.1819  1.1801  1.1817       4
1999-01-04 10:23:00  1.1817  1.1818  1.1804  1.1814      18
1999-01-04 10:24:00  1.1817  1.1817  1.1802  1.1806      12
1999-01-04 10:25:00  1.1807  1.1815  1.1795  1.1808      26
1999-01-04 10:26:00  1.1803  1.1806  1.1790  1.1806       4
1999-01-04 10:27:00  1.1801  1.1801  1.1779  1.1786      23
1999-01-04 10:28:00  1.1795  1.1801  1.1776  1.1788      28
1999-01-04 10:29:00  1.1793  1.1795  1.1782  1.1789      10
1999-01-04 10:31:00  1.1780  1.1792  1.1776  1.1792      12
1999-01-04 10:32:00  1.1788  1.1792  1.1788  1.1791       4

```

which has Open, High, Low, Close (OHLC) and volume values for eve