In [13]:
def build_inverted_index(documents):

    inverted_index = {}
    for doc_id, text in documents.items():
        tokens = set(text.lower().split())
        for token in tokens:
            if token not in inverted_index:
                inverted_index[token] = []
            inverted_index[token].append(doc_id)
    return inverted_index


In [15]:
def search_inverted_index(index, query):

    query_tokens = set(query.lower().split())
    # Start with an empty set of results
    results = set()
    first_token = True

    for token in query_tokens:
        if token in index:
            if first_token:
                results = set(index[token])
                first_token = False
            else:
                results = results.intersection(set(index[token]))
    return list(results)

In [17]:
documents = {
    'doc1': "The quick brown fox jumped over the lazy dog.",
    'doc2': "The lazy dog slept in the sun.",
    'doc3': "An inverted index is a data structure storing a mapping from content, such as words or numbers."
}

In [28]:
# Printing Inverted Index
inverted_index = build_inverted_index(documents)
print("Inverted Index:")
for word, doc_list in inverted_index.items():
    print(f"  {word}: {doc_list}")


Inverted Index:
  lazy: ['doc1', 'doc2']
  dog.: ['doc1']
  over: ['doc1']
  jumped: ['doc1']
  the: ['doc1', 'doc2']
  brown: ['doc1']
  fox: ['doc1']
  quick: ['doc1']
  in: ['doc2']
  slept: ['doc2']
  sun.: ['doc2']
  dog: ['doc2']
  mapping: ['doc3']
  content,: ['doc3']
  as: ['doc3']
  words: ['doc3']
  storing: ['doc3']
  data: ['doc3']
  a: ['doc3']
  from: ['doc3']
  structure: ['doc3']
  numbers.: ['doc3']
  index: ['doc3']
  such: ['doc3']
  inverted: ['doc3']
  is: ['doc3']
  an: ['doc3']
  or: ['doc3']


In [26]:
# Search query
query = "lazy dog"
search_results = search_inverted_index(inverted_index, query)

print(f"\nSearching for '{query}':")
print(f"Found in documents: {search_results}")


Searching for 'lazy dog':
Found in documents: ['doc2']
