In [1]:
%load_ext pycodestyle_magic
%pycodestyle_on

In [133]:
import re
from typing import List, Optional, Tuple


def normalize_rows(
    table: List[List[Optional[str]]]
) -> List[List[Optional[str]]]:
    max_cols = max((len(r) for r in table), default=0)
    return [row + [None] * (max_cols - len(row)) for row in table]


def remove_duplicate_columns(
    table: List[List[Optional[str]]]
) -> List[List[Optional[str]]]:
    if not table:
        return []
    seen: set = set()
    keep_idxs: List[int] = []
    for j in range(len(table[0])):
        col = tuple(row[j] for row in table)
        if col not in seen:
            seen.add(col)
            keep_idxs.append(j)
    return [[row[j] for j in keep_idxs] for row in table]


def remove_empty_rows(
    table: List[List[Optional[str]]]
) -> List[List[Optional[str]]]:
    return [row for row in table if any(cell is not None for cell in row)]


def remove_duplicate_rows(
    table: List[List[Optional[str]]]
) -> List[List[Optional[str]]]:
    seen: set = set()
    unique: List[List[Optional[str]]] = []
    for row in table:
        key: Tuple[Optional[str], ...] = tuple(row)
        if key not in seen:
            seen.add(key)
            unique.append(row)
    return unique


def transform_cell(cell: Optional[str]) -> str:
    if cell is None:
        return ''
    if re.fullmatch(r'\d{2}/\d{2}/\d{2}', cell):
        d, m, y = cell.split('/')
        return f'{y}.{m}.{d}'
    low = cell.lower()
    if low in ('да', 'нет'):
        return low
    if ',' in cell:
        return cell.split(',', 1)[0]
    if re.fullmatch(r'-?\d+(?:\.\d+)?', cell):
        return str(int(round(float(cell))))
    return cell


def transform_cells(
    table: List[List[Optional[str]]]
) -> List[List[str]]:
    return [
        [transform_cell(cell) for cell in row]
        for row in table
    ]


def transpose(table: List[List[str]]) -> List[List[str]]:
    return [list(col) for col in zip(*table)]


def main(table: List[List[Optional[str]]]) -> List[List[str]]:
    if not table:
        return []

    step1 = normalize_rows(table)
    step2 = remove_duplicate_columns(step1)
    step3 = remove_empty_rows(step2)
    step4 = remove_duplicate_rows(step3)
    step5 = transform_cells(step4)
    return transpose(step5)


86:1: W391 blank line at end of file


In [136]:
import json
import re
from typing import List, Optional


def main(table: List[List[Optional[str]]]) -> List[List[str]]:
    if not table:
        return []
    s = json.dumps(table, ensure_ascii=False).replace('null', '""')
    s = re.sub(
        r'"(\d{2})/(\d{2})/(\d{2})"',
        lambda m: f'"{m.group(3)}.{m.group(2)}.{m.group(1)}"',
        s
    )
    s = s.replace('"Да"', '"да"').replace('"Нет"', '"нет"')
    s = re.sub(r'"(\w+),[^"]+"', r'"\1"', s)
    s = re.sub(
        r'"(-?\d+\.\d+)"',
        lambda m: f'"{int(round(float(m.group(1))))}"',
        s
    )
    loaded = json.loads(s)
    rows = list(dict.fromkeys(
        tuple(r) for r in loaded if any(r)
    ))
    cols = list(dict.fromkeys(zip(*rows)))
    return [list(c) for c in cols]


28:1: W391 blank line at end of file


27:1: W391 blank line at end of file


30:1: W391 blank line at end of file


In [137]:
if __name__ == '__main__':
    data1 = [
        ['Зузошяк, Р.А.', 'Да', 'Да', '02/10/18'],
        ['Дуфян, Р.Е.', 'Нет', 'Нет', '00/06/21'],
        ['Гарабич, Р.З.', 'Да', 'Да', '00/01/01'],
        ['Нафирий, М.Е.', 'Да', 'Да', '04/11/26'],
        ['Нафирий, М.Е.', 'Да', 'Да', '04/11/26'],
        ['Нафирий, М.Е.', 'Да', 'Да', '04/11/26'],
    ]
    print(main(data1))

    data2 = [
        ['Гемарян, Р.И.', 'Да', 'Да', '00/01/05'],
        ['Гемарян, Р.И.', 'Да', 'Да', '00/01/05'],
        ['Мисафберг, Р.Ц.', 'Да', 'Да', '99/12/24'],
        ['Гегов, С.З.', 'Да', 'Да', '04/01/25'],
        ['Гемарян, Р.И.', 'Да', 'Да', '00/01/05'],
    ]
    print(main(data2))


21:1: W391 blank line at end of file


[['Зузошяк', 'Дуфян', 'Гарабич', 'Нафирий'], ['да', 'нет', 'да', 'да'], ['18.10.02', '21.06.00', '01.01.00', '26.11.04']]
[['Гемарян', 'Мисафберг', 'Гегов'], ['да', 'да', 'да'], ['05.01.00', '24.12.99', '25.01.04']]
