In [1]:
x = """
{| class="wikitable" style="text-align:left;"
|Season
| style="text-align:center;" |League
| style="text-align:center;" |Position
|-
|2000-01
|English Premier League
|1st
|-
|2001-02
|English Premier League
|3rd
|-
|2002-03
|English Premier League
|1st
|-
|2003-04
|English Premier League
|3rd
|-
|2004-05
|English Premier League
|3rd
|-
|2005-06
|English Premier League
|2nd
|-
|2006-07
|English Premier League
|1st
|-
|2007-08
|English Premier League
|1st
|-
|2008-09
|English Premier League
|1st
|-
|2009-10
|English Premier League
|2nd
|-
|2010-11
|English Premier League
|1st
|-
|2011-12
|English Premier League
|2nd
|-
|2012-13
|English Premier League
|1st
|-
|2013-14
|English Premier League
|7th
|-
|2014-15
|English Premier League
|4th
|-
|2015-16
|English Premier League
|5th
|-
|2016-17
|English Premier League
|6th
|-
|2017-18
|English Premier League
|2nd
|-
|2018-19
|English Premier League
|6th
|-
|2019-20
|English Premier League
|3rd
|-
|2020–21
|English Premier League
|2nd
|-
|2021–22
|English Premier League
|6th
|-
|2022–23
|English Premier League
|3rd
|}
"""

In [4]:
import html

import mwparserfromhell
from wikitextparser import Table
from bs4 import BeautifulSoup


def wiki_table_to_html(wiki_table_str: str) -> str:

    wiki_table_mw_parsed = mwparserfromhell.parse(wiki_table_str)
    wiki_table_tags = wiki_table_mw_parsed.filter_tags(matches="table")

    # Empty table
    if not wiki_table_tags:
        return ""

    mw_node = wiki_table_tags[0]

    result = ["<table>"]
    first_row = False
    exclamation_header_loop = False
    bar_header_loop = False

    for row in mw_node.contents.nodes:

        # The header loop will not be necessary
        if row.wiki_markup == "|-":
            first_row = True  # Mark that the first row has been encountered (it is just for header cases)

        # Check if the header "!" loop is active and
        # if the current row is not a header cell
        if (
            exclamation_header_loop is True
            and isinstance(row, mwparserfromhell.nodes.Tag)
            and row.tag != "th"
        ):
            exclamation_header_loop = False
            result.append("</tr>")

        # Check if the header "|" loop is active and
        # if the current row is not a <td> or <th> cell
        if (
            bar_header_loop is True
            and isinstance(row, mwparserfromhell.nodes.Tag)
            and row.tag != "td"
        ):
            bar_header_loop = False
            result.append("</tr>")

        # Handle different cases:
        # Captions
        # "Exclamation" headers
        # "Bar" headers
        # "Normal" rows with "|-"

        # Handle optional case for captions
        if (
            isinstance(row, mwparserfromhell.nodes.Tag)
            and row.tag == "td"
            and row.contents.startswith("+")
            and row.wiki_markup == "|"
            and first_row is False
        ):
            # Extract the caption text, removing the '+' and
            # any leading/trailing whitespace
            caption_text = row.contents[1:].strip()
            result.append(f"<caption>{caption_text}</caption>")

        # Handle the optional case for header cells
        # that use exclamations and "\n"
        # ! Header 1
        # ! Header 2
        # ...
        elif (
            isinstance(row, mwparserfromhell.nodes.Tag)
            and row.tag == "th"
            and row.wiki_markup == "!"
            and first_row is False
        ):
            # If this is the first header cell,
            # start a new row and mark the header loop as active
            if not exclamation_header_loop:
                result.append("<tr>")
                exclamation_header_loop = True

            result.append("<th>")

            # Process the contents of the header cell
            for content in row.contents.nodes:
                if isinstance(content, mwparserfromhell.nodes.Text):
                    result.append(str(content))

            result.append("</th>")

        # Handle the special case for header cells
        # that use "|" and "\n"
        # | Header 1
        # | style = "text-align:center;" | Header 2
        # | style = "text-align:center;" | Header 3
        # ...
        elif (
            isinstance(row, mwparserfromhell.nodes.Tag)
            and row.tag == "td"
            and row.wiki_markup == "|"
            and first_row is False
        ):
            # If this is the first header cell,
            # start a new row and mark the header loop as active
            if not bar_header_loop:
                result.append("<tr>")
                bar_header_loop = True

            result.append("<th>")

            # Process the contents of the header cell
            for content in row.contents.nodes:
                if isinstance(content, mwparserfromhell.nodes.Text):
                    result.append(str(content))

            result.append("</th>")

        # Handle the default case for table rows
        if isinstance(row, mwparserfromhell.nodes.Tag) and row.tag == "tr":
            result.append("<tr>")

            for cell in row.contents.nodes:
                if isinstance(cell, mwparserfromhell.nodes.Tag) and cell.tag in [
                    "td",
                    "th",
                ]:
                    # Extract rowspan and colspan attributes if they exist
                    attrs = []
                    if any("rowspan" in attribute for attribute in cell.attributes):
                        for attribute in cell.attributes:
                            if "rowspan" in attribute:
                                attrs.append(attribute.strip())
                                break
                    if any("colspan" in attribute for attribute in cell.attributes):
                        for attribute in cell.attributes:
                            if "colspan" in attribute:
                                attrs.append(attribute.strip())
                                break

                    # Construct the opening cell tag with attributes (if any)
                    attrs_str = " ".join(attrs)
                    result.append(
                        f"<{cell.tag} {attrs_str}>" if attrs_str else f"<{cell.tag}>"
                    )

                    # Process the contents of the cell
                    for content in cell.contents.nodes:
                        if isinstance(content, mwparserfromhell.nodes.Text):
                            result.append(str(content))

                    # Close the cell tag
                    result.append(f"</{cell.tag}>")

            result.append("</tr>")  # Close the row

    # Close the table tag and return the result as a single string
    result.append("</table>")
    html_table = "".join(result)

    # # Parse the HTML string using BeautifulSoup
    # soup = BeautifulSoup(html_table, 'html.parser')
    #
    # # Prettify the HTML (indent and format)
    # pretty_html_table = soup.prettify()

    return html_table


print(wiki_table_to_html(x))


<table><tr><td>Season
</td><td>League
</td><td>Position
</td></tr><tr><td>2000-01
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>2001-02
</td><td>English Premier League
</td><td>3rd
</td></tr><tr><td>2002-03
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>2003-04
</td><td>English Premier League
</td><td>3rd
</td></tr><tr><td>2004-05
</td><td>English Premier League
</td><td>3rd
</td></tr><tr><td>2005-06
</td><td>English Premier League
</td><td>2nd
</td></tr><tr><td>2006-07
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>2007-08
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>2008-09
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>2009-10
</td><td>English Premier League
</td><td>2nd
</td></tr><tr><td>2010-11
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>2011-12
</td><td>English Premier League
</td><td>2nd
</td></tr><tr><td>2012-13
</td><td>English Premier League
</td><td>1st
</td></tr><tr><td>201