In [None]:
pip install llama-index llama-hub youtube_transcript_api

In [None]:
import torch
from llama_hub.youtube_transcript import YoutubeTranscriptReader
from llama_index import PromptHelper, ServiceContext, VectorStoreIndex
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts.prompts import SimpleInputPrompt

# Globals
model_name = "HuggingFace4/zephyr-7b-beta"
tokenizer = "BAAI/bge-small-en-v1.5"

# Youtube loader
new_heights_vids = ["https://www.youtube.com/watch?v=Zhv7aMcaCKM"]
loader = YoutubeTranscriptReader()
documents = loader.load_data(ytlinks=new_heights_vids)

# Prompts
system_prompt = (
    "<|SYSTEM|> You are a helpful, resourceful, and friendly "
    + "chatbot who always responds will well thought out answers."
)

query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")
prompt_helper = PromptHelper(
    max_input_size=4096,
    num_output=256,
    max_chunk_overlap=0.2,
)

In [None]:
# Models
embedding = HuggingFaceEmbedding(tokenizer, trust_remote_code=True)
llm = HuggingFaceLLM(
    system_prompt=system_prompt,
    query_wrapper_prompt="",
    model_name=model_name,
    tokenizer_name=tokenizer,
    model_kwargs={"torch_dtype": torch.bfloat16},
    generate_kwargs={
        "do_sample": True,
        "temperature": 0.7,
        "top_k": 10,
        "top_p": 0.95,
    },
)


In [None]:
# Vector Store
service_context = ServiceContext.from_defaults(
    chunk_size=1024, llm=llm, embed_model=embedding, prompt_helper=prompt_helper
)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

# persist
index.storage_context.persist("vector_db")

query_engine = index.as_query_engine()
query_engine.query(
    "Summarize what Travis Kelce said about the Chiefs' loss to the Broncos."
)