In [1]:
import polars as pl

In [3]:
def process(f, col_starts, col_names, drop_cols, skip=0):
    '''
    Process data from a text file with fixed width columns.
    @param f: path to document which is processed
    @param col_starts: list of start positions for columns
    @param col_names: list of column names
    @param drop_cols: list of column names that are dropped
    @param skip: number of rows to skip
    @return: returns a DataFrame containg the processed columns
    '''
    
    raw = pl.read_csv(
        f,
        has_header = False,
        skip_lines = skip,
        new_columns = ['full_str']
    )

    max_len = (raw
        .with_columns(pl.col('full_str').str.len_chars().alias('len'))
        .select(pl.max('len'))
        .item(0, 0)
              )
    
    col_starts.append(max_len)

    column_names = col_names

    return raw.with_columns(
        [
            pl.col('full_str').str.slice(col_starts[i]-1, col_starts[i+1]-col_starts[i]).str.strip_chars().alias(col)
            for i, col in enumerate(column_names)
        ]
    ).drop('full_str').drop(drop_cols)