In [None]:
import os
import re
import datetime
from collections import defaultdict

import attr

In [None]:
LOG_FILENAME = "~/Downloads/mypy_20240130.log"
REPO_BASE_PATH = "~/dev/datalens-backend"

PKG_RUN_PREFIX = "Cmd: ['mypy', '--cache-dir=/tmp/mypy_cache']; cwd=data/"
CWD_PREFIX = "/src/"  # "/data/"
pkg_run_cmd_re= re.compile(
    "\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z Cmd: \['mypy', '\-\-cache-dir=/tmp/mypy_cache'\]; cwd=(.+)"
)

err_line_re = re.compile(
    "\d{4}\-\d{2}\-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z (?P<path>[^:]+):(?P<line>\d+): (?P<status>\w+): (?P<msg>.*)"
)

In [None]:
@attr.s
class ErrInfo:
    pkg: str = attr.ib()
    path: str = attr.ib()
    line: int = attr.ib()  # starting from 0
    msg_list: list[str] = attr.ib()

    def get_comment(self) -> str:
        # Use only first err message if there are more than one per line
        return f"  # type: ignore  # {datetime.date.today().isoformat()} # TODO: {self.msg_list[0]}"

    def get_full_path(self) -> str:
        return os.path.join(self.pkg, self.path)

In [None]:
file_errs: defaultdict[str: list[ErrInfo]] = defaultdict(list)

with open(LOG_FILENAME, "r") as log_file:
    pkg_name = None
    for line in log_file:
        line = line.strip()
        run_cmd_match = pkg_run_cmd_re.match(line)
        if run_cmd_match:
            pkg_name = run_cmd_match.groups()[0].replace(CWD_PREFIX, "")

        match = err_line_re.match(line)
        if match:
            groups = match.groupdict()

            if not pkg_name:
                continue

            if groups["status"] == "error":
                err_info = ErrInfo(
                    pkg=pkg_name,
                    path=groups["path"],
                    line=int(groups["line"]) - 1,
                    msg_list=[groups["msg"]],
                )
                assert err_info.line >= 0

                file_errs[err_info.get_full_path()].append(err_info)

print(len(file_errs))

# Merge single row errors
for file_path, ei_list in file_errs.items():
    new_ei_list = []
    prev_ei = None
    for ei in ei_list:
        if not prev_ei:
            new_ei_list.append(ei)
            prev_ei = ei
        else:
            if ei.line == prev_ei.line:
                prev_ei.msg_list.extend(ei.msg_list)
            else:
                new_ei_list.append(ei)
                prev_ei = ei
    file_errs[file_path] = new_ei_list

In [None]:
paths = list(
    filter(
        lambda t: (
            # t.startswith("lib/")
            # (
            #     t.startswith("lib/") or 
            #     t.startswith("app/")
            # ) and 
            # not t.startswith("lib/dl_formula/dl_formula/parser/antlr/")
        ), 
        file_errs.keys(),
    )
)
len(paths)

In [None]:
for filename in file_errs.keys():
    path = os.path.join(REPO_BASE_PATH, filename)
    # print(path)
    # print(len(file_errs[filename]))
    with open(path, 'r') as f:
        lines = f.readlines()

    for ei in file_errs[filename]:
        line = lines[ei.line].rstrip()
        if "#" in line:
            print(f"!!! Multiple comments in one line: {path}:{ei.line + 1}")
        lines[ei.line] = f"{line}{ei.get_comment()}\n"

    with open(path, 'w') as f:
        f.writelines(lines)