In [1]:
!pip install youtube_transcript_api google-generativeai chromadb

Collecting youtube_transcript_api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl.metadata (15 kB)
Collecting chromadb
  Downloading chromadb-0.5.15-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.3-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.7.0-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.19.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-

In [2]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter

import google.generativeai as genai
import chromadb
from chromadb.utils import embedding_functions
import os

In [5]:
GEMINI_API_KEY = 'AIzaSyCE6oZWHpWwDl9im4NUUhQjQcsC4TxPwCs'
genai.configure(api_key=GEMINI_API_KEY)

# Instantiate Gemini model
# Model choices: https://ai.google.dev/gemini-api/docs/models/gemini
genai_model = genai.GenerativeModel('models/gemini-1.5-flash')

# Load the vector database, if it exists, otherwise create new on first run
chroma_client = chromadb.PersistentClient(path="my_vectordb")

# Select an embedding function.
# Embedding Function choices:https://docs.trychroma.com/guides/embeddings#custom-embedding-functions
gemini_ef  = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=GEMINI_API_KEY)

# Load collection, if it exists, otherwise create new on first run. Specify the model that we want to use to do the embedding.
chroma_collection = chroma_client.get_or_create_collection(name='yt_notes', embedding_function=gemini_ef)

In [6]:
yt_video_id = 'hQH4-5o0BMM'

# Adjust prompt as needed
prompt = "Extract key notes from video transcript: "

In [7]:
transcript = YouTubeTranscriptApi.get_transcript(yt_video_id, languages=['en','en-US','en-GB'])
transcript = TextFormatter().format_transcript(transcript)

with open("temp_transcript.txt", "w") as file:
    file.write(transcript)

In [8]:
response = genai_model.generate_content(prompt + transcript, stream=False)

with open("temp_notes.txt", "w") as file:
    file.write(response.text)

In [9]:
with open("temp_notes.txt", "r") as file:
    notes = file.read()

# Insert, if record doesn't exist, otherwise update existing record
# https://docs.trychroma.com/reference/py-collection#upsert
chroma_collection.upsert(
    documents=[notes],
    ids=[yt_video_id]
)

# Validation
result = chroma_collection.get(yt_video_id, include=['documents'])
result

{'ids': ['hQH4-5o0BMM'],
 'embeddings': None,
 'documents': ["## Key Notes from the Spaghetti and Meat Sauce Video:\n\n**Quick & Easy:**\n\n* 30-minute prep time\n* Perfect for weeknight dinners\n* Can be made any night of the week\n\n**Flavorful & Healthy:**\n\n* Loaded with vegetables\n* Caramelized veggies for added sweetness\n* Combination of ground beef and Italian sausage for depth of flavor\n* Umami boost from fish sauce\n\n**Simple Steps:**\n\n* Prep vegetables (onion, garlic, carrot, celery)\n* Sauté vegetables in olive oil\n* Add ground beef and Italian sausage\n* Add Italian seasoning, fennel, tomato paste, and crushed tomatoes\n* Simmer for 15 minutes\n* Cook pasta for half the time listed on the box\n* Finish cooking pasta in the meat sauce\n* Top with Parmesan cheese, parsley, basil, and red pepper flakes (optional)\n\n**Other Notes:**\n\n* Use an 85/15 ground beef for a balanced sauce\n* Don't be afraid to add a generous amount of olive oil\n* Scrape the caramelization o

In [10]:
query_text = "How much beef do I need for the beef ribs recipe?"
n_results = 5

# https://docs.trychroma.com/reference/py-collection#query
results = chroma_collection.query(
    query_texts=[query_text],
    n_results=n_results,
    include=['documents', 'distances', 'metadatas'],
)

for i in range(len(results['ids'][0])):
    id       = results["ids"][0][i]
    document = results['documents'][0][i]

    print("************************************************************************")
    print(f"{i+1}.  https://youtu.be/{id}")
    print("************************************************************************")
    print(document)



************************************************************************
1.  https://youtu.be/hQH4-5o0BMM
************************************************************************
## Key Notes from the Spaghetti and Meat Sauce Video:

**Quick & Easy:**

* 30-minute prep time
* Perfect for weeknight dinners
* Can be made any night of the week

**Flavorful & Healthy:**

* Loaded with vegetables
* Caramelized veggies for added sweetness
* Combination of ground beef and Italian sausage for depth of flavor
* Umami boost from fish sauce

**Simple Steps:**

* Prep vegetables (onion, garlic, carrot, celery)
* Sauté vegetables in olive oil
* Add ground beef and Italian sausage
* Add Italian seasoning, fennel, tomato paste, and crushed tomatoes
* Simmer for 15 minutes
* Cook pasta for half the time listed on the box
* Finish cooking pasta in the meat sauce
* Top with Parmesan cheese, parsley, basil, and red pepper flakes (optional)

**Other Notes:**

* Use an 85/15 ground beef for a balanced sauc

In [12]:
prompt = "Answer the following QUESTION using DOCUMENT as context."
prompt += f"QUESTION: {query_text}"
prompt += f"DOCUMENT: {results['documents'][0][0]}"

response = genai_model.generate_content(prompt, stream=False)
response.text

"The document you provided is about making spaghetti and meat sauce, not beef ribs. Therefore, it doesn't contain any information about the amount of beef needed for a beef rib recipe. \n"