In [None]:
## Dependencies for semantic/neural search:
import numpy as np
!pip install sentence-transformers
# We use a pretrained model from https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')  # We can change it to a better model if we find one


In [21]:
# load document for neural search
def load_documents(file_path):
    with open(file_path, 'r') as file:
        documents = file.readlines()  # Read all lines in the file
    documents = [doc.strip() for doc in documents]  # Remove any leading/trailing whitespace
    return documents


# Neural search function
def neural_search(documents, user_input):
    doc_embeddings = model.encode(documents)  # Encode documents
    query_embedding = model.encode(user_input)  # Encode user input
    
    cosine_similarities = np.dot(query_embedding, doc_embeddings.T)  # Calculate cosine similarities
    ranked_doc_indices = np.argsort(cosine_similarities)[::-1]  # Rank hits (higher is better), Sort descending
    
    # Output the results (top 3 matches)
    num_results = min(3, len(documents))  # Limit to top 3 results
    print(f'Your query "{user_input}" matches {len(documents)} documents.')
    print(f"Here are the top {num_results} results:")
    
    for i in range(num_results):
        doc_idx = ranked_doc_indices[i]
        demo_doc = documents[doc_idx][:50]  # Display first 50 characters of the document content
        print(f"Doc #{i+1} (score: {cosine_similarities[doc_idx]:.4f}): {demo_doc}")


def user_query():
    print()
    user_input = input("Please Enter your query, type 'quit' to exit: ")
    return user_input

def input_checker(user_input):
    if user_input == "quit" or user_input == "":
        print("Exit")
        return False
    return True

def main():

    small_wiki = 'enwiki-20181001-corpus.100-articles.txt'
    documents = load_documents(filepath)

    while True:
        user_input = user_query()
        if input_checker(user_input) == False: 
            break
            
        neural_search(documents, user_input)

# Run the main function
if __name__ == "__main__":
    main()




Please Enter your query, type 'quit' to exit:  edmund


Your query "edmund" matches 11002 documents.
Here are the top 3 results:
Doc #1 (score: 0.7401): Edmund Carpenter
Doc #2 (score: 0.7093): Edmund Leach
Doc #3 (score: 0.5513): Oscar Lewis



Please Enter your query, type 'quit' to exit:  quit


Exit
