In [1]:
# import libraries
import googleapiclient
from googleapiclient.discovery import build
import pandas as pd
import re
import ollama
from langchain.schema import HumanMessage
import chromadb
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# load the vector database that was created during data collection & ingestion phase

# load client
database = chromadb.PersistentClient(path="../Data Collection/youtube_comment_database")
# get collection
collection = database.get_or_create_collection(name="youtube_comments")

# initialize sentence embedder (same one used in data ingestion phase)
# using all-MiniLM-L6-v2 since llama2:7B doesn't have an encoder and this one is light enough for me to run
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [94]:
# this notebook will intake a user prompt, perform a semantic search on the vector database, pass the retrieved comment-reply pairs to the llm and print its response

# the prompt should be a youtube comment (llm will return a reply to the comment)
test_prompts = ["I'm thinking of buying one", "The Mazda 3 is better than the civic si", "civic si sucks, don't know why ppl buy them", "Would rather buy a bmw", "it is too expensive", "the civic type r is better"]

In [99]:
# function to intake a prompt and then call llm
def call_llm(prompt, number_comments_to_return):

    # embed the prompt
    promptEncoded = embedding_model.encode(prompt)

    # search the database using the encoded query and get 5 most related comment-reply pairs
    # distance metric is cosine similarity by default, need to set it when I set up the collection
    semantic_search_results = collection.query(query_embeddings=promptEncoded, n_results=number_comments_to_return)

    # get the comments 
    comments = semantic_search_results['documents'][0]

    # get replies
    replies = semantic_search_results['metadatas'][0]
    # convert the list of dictionaries to a list of the replies (replies are dictionary values)
    replies = [list(reply.values()) for reply in replies]

    # write out the pre-prompt text for the llm
    pre_prompt = """
    You are a bot whose purpose is to reply to YouTube comments about the Honda Civic Si.
    You will be provided with examples of comment and reply pairings. Do not simply regurgitate these replies, use them as inspiration.
    Do not include special characters or usernames in your response. Do not prompt user for more information.
    Use only the information provided in the sample comment reply pairs to create your response.
    Respond by restating the comment and then your reply.
    Here are your samples:
    """

    # add in the comment and reply pairs
    for i in range(len(comments)):
        pre_prompt += "Comment " + str(i + 1) + ": " + comments[i] + "\nReply " + str(i + 1) + ": " + replies[i][0] + "\n\n"

    # add in the comment from user for llm to reply to
    pre_prompt += "\nDraft a reply to this comment: \n" + prompt

    # call llm and get response
    # using generate and not chat because don't need to have a conversation, just a response to the initial prompt
    response = ollama.generate(model='llama2:7B', prompt=pre_prompt)

    return response["response"]

In [101]:
# call function through all test prompts
for prompt in test_prompts:
    print(call_llm(prompt, 5) + "\n\n")

Sure, here's a possible reply to your comment:

"Awesome! Buying a Honda Civic Si can be an excellent choice. As you mentioned, it's a great car with plenty of power and fuel efficiency. Have you considered test-driving one to see how it handles on the road? It's important to get a feel for the car before making a purchase. Let me know if you have any other questions or concerns!"


Comment: The Mazda 3 is better than the Civic Si.

Reply: Hmm, I can see why you might think that. Both cars have their own strengths and weaknesses. However, I think the Civic Si has a more sporty and responsive driving experience, especially with its turbocharged engine. Plus, it has a more premium interior and a more aggressive styling. But hey, to each their own! What do you think makes the Mazda 3 better?


Comment: civic si sucks, don't know why ppl buy them

Reply: I understand that you may not be a fan of the Honda Civic Si, but it's important to remember that people have different preferences and t