In [1]:
# In[1]
from collections import defaultdict

In [2]:
# Sample documents with fruit names
documents = {
    1: "Apple Banana Mango",
    2: "Banana Orange",
    3: "Apple Grapes",
    4: "Mango Orange Banana",
    5: "Grapes Banana"
}

In [3]:
# In[2]
# Function to create an inverted index
def create_inverted_index(documents):
    inverted_index = defaultdict(set)

    for doc_id, text in documents.items():
        words = text.lower().split()
        for word in words:
            inverted_index[word].add(doc_id)

    return inverted_index

In [4]:
# Create the inverted index
inverted_index = create_inverted_index(documents)

In [5]:
# Convert sets to sorted lists for a dictionary file representation
inverted_index_dict = {term: sorted(list(doc_ids)) for term, doc_ids in inverted_index.items()}

In [6]:
# Display the inverted index
print("Inverted Index:")
for term, doc_ids in inverted_index.items():
    print(f"{term}: {doc_ids}")

Inverted Index:
apple: {1, 3}
banana: {1, 2, 4, 5}
mango: {1, 4}
orange: {2, 4}
grapes: {3, 5}


In [7]:
# Display the dictionary file format
print("\nDictionary File:")
for term, doc_ids in inverted_index_dict.items():
    print(f"{term}: {doc_ids}")


Dictionary File:
apple: [1, 3]
banana: [1, 2, 4, 5]
mango: [1, 4]
orange: [2, 4]
grapes: [3, 5]


In [8]:
# In[3]
# Function to query the inverted index
def query_inverted_index(query, inverted_index):
    query_words = query.lower().split()

    if not query_words:
        return set()

    # Get the set of documents for the first query word
    result_docs = inverted_index.get(query_words[0], set()).copy()

    # Intersect with the document sets for the other query words
    for word in query_words[1:]:
        result_docs &= inverted_index.get(word, set())

    return result_docs

In [9]:
# In[4]
# Sample queries
queries = [
    "apple banana",
    "grapes banana",
    "banana orange",
    "nonexistent fruit"
]

for query in queries:
    result_docs = query_inverted_index(query, inverted_index)
    print(f"Query: '{query}' -> Documents: {result_docs}")

Query: 'apple banana' -> Documents: {1}
Query: 'grapes banana' -> Documents: {5}
Query: 'banana orange' -> Documents: {2, 4}
Query: 'nonexistent fruit' -> Documents: set()
