In [3]:
!pip install -q youtube-transcript-api langchain_community tiktoken langchain-openai langchainhub chromadb langchain langchain-core langchain_google_genai llama_index qdrant-client sentence-transformers fastembed llama-index-llms-groq

In [2]:
%pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed

Collecting llama-index-vector-stores-qdrant
  Downloading llama_index_vector_stores_qdrant-0.3.0-py3-none-any.whl.metadata (767 bytes)
Collecting llama-index-embeddings-fastembed
  Downloading llama_index_embeddings_fastembed-0.2.0-py3-none-any.whl.metadata (697 bytes)
Downloading llama_index_vector_stores_qdrant-0.3.0-py3-none-any.whl (11 kB)
Downloading llama_index_embeddings_fastembed-0.2.0-py3-none-any.whl (2.7 kB)
Installing collected packages: llama-index-vector-stores-qdrant, llama-index-embeddings-fastembed
Successfully installed llama-index-embeddings-fastembed-0.2.0 llama-index-vector-stores-qdrant-0.3.0


### Setup Youtube API

In [4]:
import os
import re
from google.colab import userdata
import googleapiclient.discovery

# Set up the YouTube API client
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY =userdata.get('youtube_video_api')

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

### Get Video ID from URL

In [5]:
def get_video_id(url):
    # Regular expression to extract the video ID from the URL
    video_id_pattern = re.compile(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*')
    match = video_id_pattern.search(url)
    if match:
        return match.group(1)

    return None

### Give Video details from Video ID
- Title
- Description
- Transcript
- Publish Date
- Chanel Name

In [6]:
from youtube_transcript_api import YouTubeTranscriptApi


def get_video_details(video_id):
    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=video_id
    )
    response = request.execute()
    items = response.get('items', [])
    if not items:
        return None

    video_data = items[0]
    title = video_data['snippet']['title']
    description = video_data['snippet']['description']
    published_at = video_data['snippet']['publishedAt']
    author = video_data['snippet']['channelTitle']

    try:
      transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
      vid_transcript= " ".join([entry['text'] for entry in transcript])

    except Exception as e:
      print(f"Could not retrieve transcript for video ID {video_id}: {e}")
      return None

    return {
        'title': title,
        'description': description,
        'published_at': published_at,
        'author': author,
        'Transcript': vid_transcript,
        'video_id': video_id
    }

### Text filtering function

In [7]:
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def filter_text(text):
    if not isinstance(text, str): # Check if the text is a string
        print(f"Non-string value encountered: {text}")
        return "" # Return an empty string if text is not a string

    text = text.lower() # Convert to lowercase

    # Combine regex patterns to reduce the number of sub calls
    pattern = re.compile(r'http\S+|www\S+|https\S+|<.*?>|[\d]|[^\w\s]|[\t\n]')
    text = pattern.sub(' ', text)

    # Removing extra spaces in a single step
    text = re.sub(r'\s+', ' ', text).strip()

    # Tokenization and lemmatization in one loop
    tokens = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]

    return ' '.join(tokens)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### Creating a pipeline in the following sequence
- URL
- Getting video details
- Pre process text
- Convert text into Document

```
!pip uninstall nltk llama_index -y
!pip install nltk llama_index
```

In [8]:
from llama_index.core import Document
import uuid

global source_document_id
source_document_id=None

def get_data_pipeline(url):
  video_id = get_video_id(url)
  result = get_video_details(video_id)

  if result is None:
    return None

  # Generate a unique source_document_id using uuid
  global source_document_id
  source_document_id = str(uuid.uuid4())

  text=result['title'] + " " + result['description']+ " " +result['Transcript']
  preprocessed_text = filter_text(text)

  meta_data_dict={'author': result["author"], 'source_doc_id':source_document_id}

  #Converting the text in llama_index.core Document
  documents= (Document(text=text,metadata=meta_data_dict))
  return documents, source_document_id

### Spliting the text into Document

- `chunk_size:` This represents the maximum length of each chunk (or segment) after the text is split. For example, if chunk_size=512, each chunk will contain up to 512 characters (or tokens, depending on the implementation). This helps in breaking down large texts into smaller, manageable pieces.

- `chunk_overlap`: This defines how much of the content from the end of one chunk overlaps with the start of the next chunk. For instance, if chunk_overlap=64, the last 64 characters of one chunk will be repeated at the start of the next chunk. This ensures that important context isn't lost between chunks.

In [9]:
from llama_index.core.text_splitter import SentenceSplitter


def Document_splitter(document):
  text_splitter = SentenceSplitter(chunk_size= 500, chunk_overlap=50)
  splits= text_splitter.get_nodes_from_documents(document)
  return splits

### Setting embedd model

In [10]:
import logging
import sys
import os

import qdrant_client
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings

Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

model_optimized.onnx:   0%|          | 0.00/218M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

### Qdrant Vector Database

In [12]:
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
import logging
import sys
import os
from google.colab import userdata

qdrant_api_key = userdata.get('qdrant_api')

client = QdrantClient(
    url="09e46b8b-d380-4b87-b0ff-fc89fa53a52d.europe-west3-0.gcp.cloud.qdrant.io:6333",
    api_key=qdrant_api_key,
)

print(client.get_collections())

collections=[CollectionDescription(name='Youtube_Chatbot')]


In [13]:
vector_store = QdrantVectorStore(
    client=client,
    collection_name="Youtube_Chatbot"
)

### Creating an index

In [14]:
from llama_index.core import VectorStoreIndex, StorageContext

def create_index(nodes):

  storage_context = StorageContext.from_defaults(vector_store=vector_store)

  documents = [Document(text=node.text, metadata=node.metadata) for node in nodes]

  index = VectorStoreIndex.from_documents(
      documents,
      storage_context=storage_context,
  )

  # index.storage_context.persist()

  return index

### Retreiving the previosly stored vector database (Not nessesary for this project)

In [15]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext

# Assuming qdrant_client and vector_store are already defined

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context,
)

### Setting up LLM

In [17]:
import os
from google.colab import userdata
from llama_index.llms.groq import Groq

os.environ["GROQ_API_KEY"] = userdata.get('groq_api')
llm = Groq(model="llama3-70b-8192")
Settings.llm = llm

In [18]:
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

In [19]:
response

Response(response="There is no information provided about the author's childhood or growing up, so it's not possible to determine what the author did growing up.", source_nodes=[NodeWithScore(node=TextNode(id_='779f62c3-2b5c-4401-9c98-c37b247725a0', embedding=None, metadata={'author': 'The National Desk', 'source_doc_id': '466a6b23-87a3-40d6-99bc-6f5d94bde9cd'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='232cafb3-41ce-4af7-bea6-4c04d637b1ec', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'author': 'The National Desk', 'source_doc_id': '466a6b23-87a3-40d6-99bc-6f5d94bde9cd'}, hash='a4b0a51e2fabcabdf9cc476d9f60f14bab8fedc9001971e9d6ca4ab91114ffc4')}, text=">>A LOT OF GROUND COVERED, BUT  CLEARLY SOME TOPICS WERE LEFT   OUT AND PEOPLE AREN'T HAPPY   ABOUT THAT.   >>YEAH SURPRISINGLY SO. SO THE  NATIONAL DEBT WASN'T DISCUSSED,  EVEN THOUGH IT'S OVER $35   TRILLION AND IS EXPECTED TO   SKYROCKET T

In [20]:
response = llm.complete("Explain the importance of low latency LLMs")
print(response)

Low-latency Large Language Models (LLMs) are crucial in various applications where real-time or near-real-time processing is essential. Here are some reasons why low-latency LLMs are important:

1. **Real-time Conversational AI**: In conversational AI, such as chatbots, voice assistants, and customer service platforms, low-latency LLMs enable rapid response times, making interactions feel more natural and human-like. This is particularly important in applications where users expect immediate responses, such as customer support or virtual assistants.
2. **Live Streaming and Broadcasting**: Low-latency LLMs are essential for live streaming and broadcasting applications, such as real-time transcription, subtitles, or closed captions. This ensures that viewers receive accurate and timely information, enhancing their overall experience.
3. **Gaming and Interactive Systems**: In gaming and interactive systems, low-latency LLMs can improve the responsiveness of NPCs (non-player characters), e

### Clearing vector store when new URL is uploaded

**kwargs allows passing extra options (like filters, batch sizes, etc.) to the client.delete() method without explicitly defining each option in the function's argument list.

```
qdrant_client.delete_vectors(collection_name="Youtube_Chatbot", points_selector=None, wait=True)

def delete_vectors_by_source_document(client, collection_name, source_document_id, **kwargs):
    """Delete vectors from the collection associated with a specific source document.

    Args:
        client: The Qdrant client instance.
        collection_name: The name of the collection where vectors are stored.
        source_document_id: The ID of the source document whose associated vectors should be deleted.
    """
    filter = {"must": [{"key": "source_document_id", "match": {"value": source_document_id}}]}
    client.delete(collection_name=collection_name, filter=filter, **kwargs)
```

### Prompt template

In [21]:
from llama_index.core import PromptTemplate
from llama_index.core.query_pipeline import QueryPipeline


template = """
You are an AI assistant tasked with answering user questions based on the provided context, which is a transcript of a YouTube video. Your responses should be accurate and derived from the context.

Here is your task:
1. You will be given a user question.
2. You will be provided with the context (transcript) from the video.
3. Use only the provided transcript to answer the question.
4. Explain the details according to the context.
5. Try to give long answers even if the context is smaller.
6. Make headings and subheading to explain the question better.

Context: {transcript}

User Question: {user_question}

Your task is to provide an accurate answer based on the provided context. If the information needed to answer the question is not in the context, respond with "The context does not contain that information."
"""


prompt_tmpl = PromptTemplate(template)
p = QueryPipeline(chain=[prompt_tmpl, llm], verbose=True)

### Chain

In [63]:
from llama_index.core import PromptTemplate

def give_output(retriever, query):
  text_nodes=retriever.retrieve(query)
  docs = "\n\n".join(node.get_text() for node in text_nodes)
  output = p.run(user_question=query,transcript=docs)
  return (output)

### Deleting Points from vector store

In [69]:
from qdrant_client.http import models

def delete_previos_vectors(source_document_ID):
  client.delete(
      collection_name="Youtube_Chatbot",
      points_selector=models.PointIdsList(
          points=[0, 3, 100],
      ),
  )
  client.delete(
      collection_name="Youtube_Chatbot",
      points_selector=models.FilterSelector(
          filter=models.Filter(
              must=[
                  models.FieldCondition(
                      key="source_doc_id",
                      match=models.MatchValue(value= source_document_ID),
                  ),
              ],
          )
      ),
  )

# delete_previos_vectors("6d2c4480-2364-4471-be84-364da7d97ec5")

### Pipeline calling

In [26]:
from operator import index

# Declare the global variable
global doc_id
doc_id = None

def vector_creation_pipeline(url):
  global doc_id
  document_text, doc_id= get_data_pipeline(url)
  print(f"doc id: {doc_id}")
  split_doc= Document_splitter([document_text])
  index= create_index(split_doc)
  retriever = index.as_retriever(search_kwargs={"k": 3})

  return retriever

### Testing

In [70]:
import time

url="https://youtu.be/KyA2tFCZjrw?si=hYDh5YQ0IVA_wJwm"
start_time = time.time()

retreiver=vector_creation_pipeline(url)
print(doc_id)
print(retreiver)
str_output= give_output(retreiver, "What is the video is topic is about?")
# End the timer
end_time = time.time()
# Calculate the time taken
time_taken = end_time - start_time
print(f"Time taken to run the process: {time_taken:.2f} seconds")
print(str_output)

delete_previos_vectors(doc_id)
retreiver=None

# https://youtu.be/KyA2tFCZjrw?si=hYDh5YQ0IVA_wJwm
# print(vec_retiever)

doc id: 83e35e98-23e7-40db-87a9-e81fa57f4ea3
83e35e98-23e7-40db-87a9-e81fa57f4ea3
<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever object at 0x7ae1c6e8cd60>
[1;3;38;2;155;135;227m> Running module c7f830d2-abd0-4473-b508-e76297051c8d with input: 
user_question: What is the video is topic is about?
transcript: NY : WSTM
Toledo, OH : WNWO
Traverse City, MI : WPBN
Tulsa, OK : KTUL
Washington DC : WJLA
West Palm Beach, FL : WPEC
Wilkes-Barre, PA : WOLF
Yakima, WA : KIMA

This video and all Sinclair Broadcast G...

[0m[1;3;38;2;155;135;227m> Running module 714e2a86-858d-4cf9-9be8-8199763bd76e with input: 
messages: 
You are an AI assistant tasked with answering user questions based on the provided context, which is a transcript of a YouTube video. Your responses should be accurate and derived from the context.

...

[0mTime taken to run the process: 10.19 seconds
assistant: **Video Topic: Analysis of the First Presidential Debate between Donald Trump and K

In [27]:
!pip install --upgrade gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio)
  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.6.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting tomlkit==0.12.0 (from gradio)
  Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)
Collecting websocket

### Gradio Interface

In [47]:
print(vec_retiever)

<llama_index.core.indices.vector_store.retrievers.retriever.VectorIndexRetriever object at 0x7ae1c8be7ca0>


In [71]:
import gradio as gr

global vec_retiever
vec_retiever=None

def handle_url(url):
  global vec_retiever
  vec_retiever=vector_creation_pipeline(url)
  if vec_retiever is None:
    return "Invalid URL"

  return f"URL successfully uploaded"

def handle_vecstore_removal():
  global vec_retiever
  delete_previos_vectors(doc_id)
  vec_retiever=None
  return f"Lets start new conversation","",""

def chatbot(input_text, history):
  answer_to_prompt=give_output(vec_retiever, input_text)
  final_answer= answer_to_prompt.message.content
  return final_answer

# Set up the Gradio interface
with gr.Blocks() as demo:
  # Title of the interface
  gr.Markdown("Youtube Video Chatbot")

  # Display the processed URL output
  url_output = gr.Textbox(label="Processed URL Output", interactive=False)

  with gr.Row():
    url_input = gr.Textbox(label="Enter a URL", placeholder="Type URL here...")
    submit_button = gr.Button("Submit URL")
    clear_button= gr.Button("Start new Chat")

    submit_button.click(fn=handle_url, inputs=url_input, outputs=url_output)
    clear_button.click(fn=handle_vecstore_removal, outputs=[url_output, url_input])

  # Chat interface
  chat_interface = gr.ChatInterface(fn=chatbot,
                                    chatbot=gr.Chatbot(height=600),
                                    textbox=gr.Textbox(placeholder="Ask me a question about the video", container=False, scale=7))

  # Clear chat interface when the clear button is clicked
  def clear_chat():
    return [], ""  # Return an empty list for chat history and an empty string for the textbox

  clear_button.click(fn=clear_chat, outputs=[chat_interface, chat_interface.textbox])

demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://fa6fdd5cc9ff3009b9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


