In [2]:
docs = {
    1: "short video recommendation system",
    2: "video search ranking",
    3: "recommendation ranking model",
}

In [3]:
from collections import defaultdict
from typing import Dict, List, Set


class SearchIndex:
    """
    同时维护：
    1. 正排索引（Forward Index）：doc_id -> term list
    2. 倒排索引（Inverted Index）：term -> set(doc_id)
    """

    def __init__(self):
        self.forward_index: Dict[int, List[str]] = {}
        self.inverted_index: Dict[str, Set[int]] = defaultdict(set)

    def build(self, docs: Dict[int, str]):
        """
        构建正排 + 倒排
        """
        for doc_id, text in docs.items():
            terms = text.split()

            # 正排
            self.forward_index[doc_id] = terms

            # 倒排
            for term in terms:
                self.inverted_index[term].add(doc_id)

    # =========================
    # 查询相关方法
    # =========================

    def search_and(self, query: str) -> Set[int]:
        """
        AND 查询
        """
        terms = query.split()
        if not terms:
            return set()

        result = self.inverted_index.get(terms[0], set()).copy()
        for term in terms[1:]:
            result &= self.inverted_index.get(term, set())

        return result

    def search_or(self, query: str) -> Set[int]:
        """
        OR 查询
        """
        result = set()
        for term in query.split():
            result |= self.inverted_index.get(term, set())
        return result

    # =========================
    # Debug / 展示
    # =========================

    def print_forward(self):
        print("Forward Index:")
        for doc_id, terms in self.forward_index.items():
            print(f"{doc_id} -> {terms}")

    def print_inverted(self):
        print("Inverted Index:")
        for term, doc_ids in self.inverted_index.items():
            print(f"{term} -> {doc_ids}")

In [4]:
if __name__ == "__main__":
    docs = {
        1: "short video recommendation system",
        2: "video search ranking",
        3: "recommendation ranking model",
    }

    index = SearchIndex()
    index.build(docs)

    index.print_forward()
    print()
    index.print_inverted()

    print("\nAND Query:", index.search_and("video ranking"))
    print("OR Query:", index.search_or("video recommendation"))

Forward Index:
1 -> ['short', 'video', 'recommendation', 'system']
2 -> ['video', 'search', 'ranking']
3 -> ['recommendation', 'ranking', 'model']

Inverted Index:
short -> {1}
video -> {1, 2}
recommendation -> {1, 3}
system -> {1}
search -> {2}
ranking -> {2, 3}
model -> {3}

AND Query: {2}
OR Query: {1, 2, 3}
