In [None]:
import re
from collections import Counter
from typing import List, Tuple

from pymorphy2 import MorphAnalyzer


class Concordances:
    morph = MorphAnalyzer()

    def __init__(self, normal_text: List[List[str]]):
        self.normal_text = normal_text

    @classmethod
    def from_text(cls, text: str) -> 'Concordances':
        normal_text = []
        for line in text.split('\n'):
            normal_line = cls.normalize(line)
            if normal_line:
                normal_text.append(normal_line)
        return Concordances(normal_text)

    @classmethod
    def from_file(cls, fp) -> 'Concordances':
        normal_text = []
        line = fp.readline()
        while line:
            normal_line = cls.normalize(line)
            if normal_line:
                normal_text.append(normal_line)
            line = fp.readline()
        return Concordances(normal_text)

    @classmethod
    def normalize(cls, line: str) -> List[str]:
        return [cls.morph.parse(word)[0].normal_form for word in
                (v.lower() for v in re.findall(r"('?[а-яА-ЯёЁ][а-яА-ЯёЁ]*(?:-[а-яА-ЯёЁ]+)*'?)", line))]

    @staticmethod
    def check(line_words: List[str], index: int, phrase_words: List[str]) -> bool:
        return all(line_words[min(len(line_words) - 1, index + i)] == ch for i, ch in enumerate(phrase_words))

    def find(self, phrase: str, *, window: int = 2, min_frequency: int = 1) -> List[Tuple[str, str, int]]:
        phrase_words = self.normalize(phrase)

        left = []
        right = []
        for line_words in self.normal_text:
            for i in range(len(line_words)):
                if self.check(line_words, i, phrase_words):
                    left.append(' '.join(line_words[max(0, i - window):i]))
                    right.append(' '.join(line_words[i + len(phrase_words):i + len(phrase_words) + window]))

        left_count = [('l', nane, count) for nane, count in Counter(left).most_common() if count >= min_frequency]
        right_count = [('r', nane, count) for nane, count in Counter(right).most_common() if count >= min_frequency]
        return left_count + right_count

# Usage example

In [None]:
# ~ 40 s
with open("./data/example_text.txt", 'r', encoding='utf8') as fp:
    concordances = Concordances.from_file(fp)

In [None]:
%%time
concordances.find('в', window=2, min_frequency=1)