In [None]:
from functools import total_ordering

In [2]:
@total_ordering
class Posting:

    def __init__(self, doc_id):
        self.doc_id = doc_id

    def __eq__(self, other):
        return other == self.doc_id

    def __gt__(self, other):
        return self.doc_id > other

    def __repr__(self) -> str:
        return str(self.doc_id)

    def from_corpus(self, corpus):
        return corpus[self.doc_id]

In [None]:
class PostingsList:

    def __init__(self) -> None:
        self._postings_list = []

    @classmethod
    def from_postings_list(cls, postings_list: list):
        plist = cls()
        postings_list.sort()
        plist._postings_list = postings_list
        return plist

    @classmethod
    def from_doc_id(cls, doc_id):
        plist = cls()
        plist._postings_list = [Posting(doc_id)]
        return plist

    def merge(self, other: "PostingsList"):
        self._postings_list += other._postings_list
        self._postings_list.sort()
        for i in range(1, len(self._postings_list)):
            if self._postings_list[i] == self._postings_list[i-1]:
                self._postings_list.remove(i)

    def __repr__(self) -> str:
        return ", ".join(map(str, self._postings_list))

    def get_from_corpus(self, corpus):
        return list(map(lambda x: x.from_corpus(corpus), self._postings_list))

    def intersection(self, other: "PostingsList"):
        plist = []
        i = 0
        j = 0
        while (i < len(self._postings_list)) and (j < len(other._postings_list)):
            if self._postings_list[i] == other._postings_list[j]:
                plist += self._postings_list[i]
                i += 1
                j += 1
            elif self._postings_list[i] <= other._postings_list[j]:
                i += 1
            else:
                j += 1
        return plist

    def union(self, other: "PostingsList"):
        plist = []
        i = 0
        j = 0
        while (i < len(self._postings_list)) and (j < len(other._postings_list)):
            if self._postings_list[i] == other._postings_list[j]:
                plist += self._postings_list[i]
                i += 1
                j += 1
            elif self._postings_list[i] < other._postings_list[j]:
                plist += self._postings_list[i]
                i += 1
            else:
                plist += other._postings_list[j]
                j += 1
        return plist

In [None]:
class ImpossibleMergeException(Exception):
    pass


@total_ordering
class Term:
    def __init__(self, term: str, doc_id) -> None:
        self.term = term
        self.postings_list = PostingsList.from_doc_id(doc_id)

    def __eq__(self, other) -> bool:
        return self.term == other.term

    def __gt__(self, other) -> bool:
        return self.term > other.term

    def merge(self, other: "Term"):
        if self == other:
            self.postings_list.merge(other.postings_list)
        else:
            raise ImpossibleMergeException

    def __repr__(self) -> str:
        return self.term + ": " + str(self.postings_list)

In [97]:
class TrieNode:

    def __init__(self):
        self.children = [None] * 26
        self.is_eow = False
        self.postings_list = []

    def set_postings_list(self, postings_list: PostingsList) -> None:
        self.postings_list = postings_list
        self.is_eow = True

    def __repr__(self) -> str:
        to_return = ""
        if self.is_eow:
            to_return += ": " + str(self.postings_list) + "\n"
        else:
            for i in range(26):
                if self.children[i] is not None:
                    to_return += chr(i+ord('a')) + self.children[i].__repr__()
        return to_return

In [94]:
class MissingKeyException(Exception):
    pass


class Trie:

    def __init__(self) -> None:
        self.root = TrieNode()

    def insert(self, node: Term):
        current = self.root
        for char in node.term:
            idx = ord(char) - ord('a')
            if current.children[idx] is None:
                current.children[idx] = TrieNode()
            child = current.children[idx]
            assert child is not None
            current = child
        current.set_postings_list(node.postings_list)
        return self

    def search(self, key: str):
        current = self.root
        for char in key:
            idx = ord(char) - ord('a')
            child = current.children[idx]
            if child is None:
                raise MissingKeyException
            else:
                current = child
        return current.postings_list

    def remove(self, key: str) -> None:
        current = self.root
        for char in key:
            idx = ord(char) - ord('a')
            child = current.children[idx]
            if child is None:
                raise MissingKeyException
            else:
                current = child
        current = None

    def merge(self, other: "Trie"):
        stack = [(self.root, other.root)]
        while stack:
            node_self, node_other = stack.pop()
            for idx in range(26):
                child_self = node_self.children[idx]
                child_other = node_other.children[idx]
                if child_other is not None:
                    if child_self is None:
                        node_self.children[idx] = child_other
                    else:
                        stack += [(child_self, child_other)]

    def __repr__(self) -> str:
        return self.root.__repr__()

In [None]:
lst = [1, 2, 3]
lst2 = [4, 5, 6]
while lst:
    lst += [1, 2]

ciao
ciao
ciao
ciao
ciao


In [99]:
cane = Trie().insert(Term("cane", 1))
cave = Trie().insert(Term("gatto", 2))
cane.merge(cave)
cane

cane: 1
gatto: 2

In [None]:
class InvertedIndex:

    def __init__(self) -> None:
        self.trie = Trie()

    def get(self, key: str):
        return self.trie.search(key)

    def insert(self, term: "Term") -> None:
        self.trie.insert(term)

    def __repr__(self) -> str:
        return ""
        pass