In [19]:
# Corpus of documents
corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.",
    "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.",
    "Go for a hike and admire the natural scenery.",
    "Have a picnic with friends and share some laughs.",
    "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.",
    "Attend a workshop or lecture on a topic you're interested in.",
    "Visit an amusement park and ride the roller coasters."
]

In [20]:
import math
from collections import Counter

# Function to calculate cosine similarity
def cosine_similarity(query, document):
    # Tokenize and convert to lowercase
    query_tokens = query.lower().split()
    document_tokens = document.lower().split()

    # Create Counters for query and document
    query_counter = Counter(query_tokens)
    document_counter = Counter(document_tokens)

    # Calculate the dot product
    dot_product = sum(query_counter[token] * document_counter[token] for token in query_counter.keys())

    # Calculate the magnitude for both query and document
    query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))
    document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))

    # Calculate cosine similarity
    if query_magnitude == 0 or document_magnitude == 0:
        return 0.0  # To handle cases where either vector has zero magnitude
    similarity = dot_product / (query_magnitude * document_magnitude)

    return similarity

In [21]:
user_query = "is yoga good for health ?"
document = "yoga is good for health"
cosine_similarity(user_query, document)

0.9128709291752769

In [22]:
# Function to return the document most similar to the query
def return_response(query, corpus):
    similarities = []
    for doc in corpus:
        # Call cosine_similarity for each document
        similarity = cosine_similarity(query, doc)
        similarities.append(similarity)

    # Return the document with the highest similarity
    return corpus[similarities.index(max(similarities))]

In [23]:
# Example query
query = "picnic"

# Find the most similar document
return_response(query, corpus_of_documents)

'Have a picnic with friends and share some laughs.'