In [1]:

corpus_of_documents = [
    "Take a leisurely walk in the park and enjoy the fresh air.", "Visit a local museum and discover something new.",
    "Attend a live music concert and feel the rhythm.", "Go for a hike and admire the natural scenery."
    "Have a picnic with friends and share some laughs.", "Explore a new cuisine by dining at an ethnic restaurant.",
    "Take a yoga class and stretch your body and mind.",
    "Join a local sports league and enjoy some friendly competition.", "Attend a workshop or lecture on a topic you're interested in.", "Visit an amusement park and ride the roller coasters."
    "Attend a workshop or lecture on a topic you're interested in.",
    "Vist an amusement park and ride the roller coasters."
]

# To make embeddings of the documents and user query

In [11]:
# Documents embeddings
from collections import Counter
import math

In [12]:
document_tokens = [doc.lower().split(" ") for doc in corpus_of_documents]

In [13]:
flat_tokens = [word for doc in document_tokens for word in doc]
document_counter = Counter(flat_tokens)

In [49]:
# User embeddings
user_input = "Suggest some fun outdoor activities for the weekend."

In [15]:
query_tokens = user_query.lower().split(" ")

In [17]:
query_counter = Counter(query_tokens)

# To find similarity between documents and query

In [21]:
lst = []
for token in query_counter.keys():
    lst.append(query_counter[token])

In [23]:
# sentence vector
lst

[1, 1, 1, 1, 1, 1, 1, 1]

In [25]:
# here we will do word by word matching where we compare the words in sentence with documents 
# where similar word is found we take those number and perform the dot product
mylist = []
for tokens in query_counter.keys() and document_counter.keys():
    mylist.append(query_counter[tokens]*document_counter[tokens])

In [26]:
mylist

[0,
 0,
 0,
 0,
 0,
 6,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [33]:
dot_prod=sum(mylist)

# To find the magnitude of the vector

In [29]:
query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))

In [30]:
query_magnitude

2.8284271247461903

In [31]:
document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))

In [32]:
document_magnitude

20.322401432901575

# To find the similarity

In [34]:
similarity = (dot_prod)/(query_magnitude * document_magnitude)

In [35]:
similarity

0.15657502514382474

In [36]:
# in one whole function
def cosine_similarity(query, document):
    # tokenize and convert to lowercase
    query_tokens = query.lower().split(" ")
    document_tokens = " ".join(corpus_of_documents).lower().split(" ")

    # create Counters for query and document
    query_counter = Counter(query_tokens)
    document_counter = Counter(document_tokens)

    # Calculate dot product
    dot_product = sum(query_counter[token] * document_counter[token] for token in query_counter.keys())

    # calculate magnitudes
    query_magnitude = math.sqrt(sum(query_counter[token] ** 2 for token in query_counter))
    document_magnitude = math.sqrt(sum(document_counter[token] ** 2 for token in document_counter))

    # calculate cosine similarity
    similarity = dot_product / (query_magnitude * document_magnitude) if query_magnitude * document_magnitude !=0 else 0

    return similarity

In [37]:
cosine_similarity(user_query,corpus_of_documents)

0.15657502514382474

In [46]:
def return_response(query, corpus):
    similarities = []
    for doc in corpus:
        sim = cosine_similarity(query, [doc])
        similarities.append(sim)
    return corpus_of_documents[similarities.index(max(similarities))]  

In [48]:
relevant_document = return_response(user_query,corpus_of_documents)

# To integrate the LLM for response generation

In [40]:
import requests
import json
full_response = []

In [51]:
prompt = """
You are a bot that makes recommendations for activities. You answer in very short sentences and do not include extra information
This is the recommended activity : {relevant_document}
The user input is: {user_input}
Compile a recommendation to the user based on the recommended activity and the user input.
"""

url = 'http://localhost:11434/api/generate'

data = {
    "model":"llama2",
    "prompt":prompt.format(user_input=user_input, relevant_document=relevant_document)
}

headers = {'Content-Type': 'application/json'}
response = requests.post(url, data=json.dumps(data), headers=headers, stream=True)
try:
    for line in response.iter_lines():
        #filter out keep-alive new lines
        if line:
            decoded_line = json.loads(line.decode('utf-8'))
            # print(decoded_ine['response']) # uncomment to result, token by token
            full_response.append(decoded_line['response'])
finally:
    response.close()
print(''.join(full_response))

 Sure, here's my recommendation:

How about a picnic in the park? Pack some of your favorite snacks and drinks and enjoy the beautiful scenery while spending time with loved ones!
