In [1]:
import re

import pandas as pd


def extract_table(table_as_string, header=False):
    """
    Extract w3resource table from string.

    Parameters
    ----------
    table_as_string : str
        String containing table to extract.
    header : bool, optional
        Whether table has header or not, by default False
        Header is assumed to be the first row.

    Returns
    -------
    pd.DataFrame
        Extracted table as a dataframe.
    """
    lines = table_as_string.split("\n")

    # Extract header if needed
    if header:
        col_names = re.split(r"\s{2,}", lines[0].strip())
        lines = lines[1:]

    # get index col
    lines = [re.split(r"\s{2,}", line.strip()) for line in lines]
    idx = [line.pop(0) for line in lines]

    # Create dataframe
    if header:
        df = pd.DataFrame(lines, columns=col_names, index=idx)
    else:
        df = pd.DataFrame(lines, index=idx)

    return df
if __name__ == "__main__":
    # Example of usage
    test_1 = """0        s001     V  Alberto Franco     15/05/2002      35  street1   t1
    1        s002     V    Gino Mcneill     17/05/2002      32  street2   t2
    2        s003    VI     Ryan Parkes     16/02/1999      33  street3   t3
    3        s001    VI    Eesha Hinton     25/09/1998      30  street1   t4
    4        s002     V    Gino Mcneill     11/05/2002      31  street2   t5
    5        s004    VI    David Parkes     15/09/1997      32  street4   t6"""

    test_2 = """  student_id              name  marks
    0         S1  Danniella Fenton    200
    1         S2      Ryder Storey    210
    2         S3      Bryce Jensen    190
    3         S4         Ed Bernal    222
    4         S5       Kwame Morin    199"""

    print(extract_table(test_1))


    print(extract_table(test_2, header=True))

c:\Users\jackh\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\jackh\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


      0   1               2           3   4        5   6
0  s001   V  Alberto Franco  15/05/2002  35  street1  t1
1  s002   V    Gino Mcneill  17/05/2002  32  street2  t2
2  s003  VI     Ryan Parkes  16/02/1999  33  street3  t3
3  s001  VI    Eesha Hinton  25/09/1998  30  street1  t4
4  s002   V    Gino Mcneill  11/05/2002  31  street2  t5
5  s004  VI    David Parkes  15/09/1997  32  street4  t6
  student_id              name marks
0         S1  Danniella Fenton   200
1         S2      Ryder Storey   210
2         S3      Bryce Jensen   190
3         S4         Ed Bernal   222
4         S5       Kwame Morin   199
