In [2]:
import pickle

# Load pre-processed data
with open('preprocessed_data.pkl', 'rb') as f:
    pre_processed_data = pickle.load(f)

# Load vocabulary
with open('final_vocabulary.pkl', 'rb') as f:
    vocabulary = pickle.load(f)

In [3]:
# Initialize an inverted index using a dictionary
inverted_index_dict = {term: [] for term in vocabulary}

# Initialize an inverted index using a list of lists
inverted_index_list = [[] for _ in range(len(vocabulary))]

In [10]:
for doc_id, document_tuple in enumerate(pre_processed_data):
    document = document_tuple[0]
    
    for term in document.split():
        if term in vocabulary:
            # Update the inverted index using the dictionary approach
            inverted_index_dict[term].append(doc_id)

            # Update the inverted index using the list-based approach
            term_id = vocabulary.index(term)
            inverted_index_list[term_id].append(doc_id)

### Analysis of the two Data Structures

In [None]:
# Retrieval function for dictionary-based index
def retrieve_from_dict(term):
    return inverted_index_dict.get(term, [])

# Retrieval function for list-based index
def retrieve_from_list(term):
    term_id = vocabulary.index(term)
    return inverted_index_list[term_id]

# Insertion function for dictionary-based index
def insert_to_dict(term, doc_id):
    if term in inverted_index_dict:
        inverted_index_dict[term].append(doc_id)
    else:
        inverted_index_dict[term] = [doc_id]

# Insertion function for list-based index
def insert_to_list(term, doc_id):
    term_id = vocabulary.index(term)
    inverted_index_list[term_id].append(doc_id)

# Updation function for dictionary-based index
def update_in_dict(term, old_doc_id, new_doc_id):
    if term in inverted_index_dict:
        inverted_index_dict[term] = [doc_id for doc_id in inverted_index_dict[term] if doc_id != old_doc_id]
        inverted_index_dict[term].append(new_doc_id)

# Updation function for list-based index
def update_in_list(term, old_doc_id, new_doc_id):
    term_id = vocabulary.index(term)
    if old_doc_id in inverted_index_list[term_id]:
        inverted_index_list[term_id].remove(old_doc_id)
        inverted_index_list[term_id].append(new_doc_id)

# Deletion function for dictionary-based index
def delete_from_dict(term, doc_id):
    if term in inverted_index_dict and doc_id in inverted_index_dict[term]:
        inverted_index_dict[term].remove(doc_id)

# Deletion function for list-based index
def delete_from_list(term, doc_id):
    term_id = vocabulary.index(term)
    if doc_id in inverted_index_list[term_id]:
        inverted_index_list[term_id].remove(doc_id)