In [2]:
!pip install -q youtube-transcript-api langchain_community tiktoken langchain-openai langchainhub chromadb langchain langchain-core langchain_google_genai llama_index qdrant-client sentence-transformers fastembed llama-index-llms-groq

In [2]:
%pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed

Collecting llama-index-vector-stores-qdrant
  Downloading llama_index_vector_stores_qdrant-0.3.0-py3-none-any.whl.metadata (767 bytes)
Collecting llama-index-readers-file
  Downloading llama_index_readers_file-0.2.1-py3-none-any.whl.metadata (5.4 kB)
Collecting llama-index-embeddings-fastembed
  Downloading llama_index_embeddings_fastembed-0.2.0-py3-none-any.whl.metadata (697 bytes)
Collecting llama-index-core<0.12.0,>=0.11.0 (from llama-index-vector-stores-qdrant)
  Downloading llama_index_core-0.11.6-py3-none-any.whl.metadata (2.4 kB)
Collecting qdrant-client>=1.7.1 (from llama-index-vector-stores-qdrant)
  Downloading qdrant_client-1.11.1-py3-none-any.whl.metadata (10 kB)
Collecting pypdf<5.0.0,>=4.0.1 (from llama-index-readers-file)
  Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)
Collecting striprtf<0.0.27,>=0.0.26 (from llama-index-readers-file)
  Downloading striprtf-0.0.26-py3-none-any.whl.metadata (2.1 kB)
Collecting fastembed>=0.2.2 (from llama-index-embeddings-fa

### Setup Youtube API

In [3]:
import os
import re
from google.colab import userdata
import googleapiclient.discovery

# Set up the YouTube API client
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY =userdata.get('youtube_video_api')

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

### Get Video ID from URL

In [4]:
def get_video_id(url):
    # Regular expression to extract the video ID from the URL
    video_id_pattern = re.compile(r'(?:v=|\/)([0-9A-Za-z_-]{11}).*')
    match = video_id_pattern.search(url)
    if match:
        return match.group(1)

    return None

### Give Video details from Video ID
- Title
- Description
- Transcript
- Publish Date
- Chanel Name

In [5]:
from youtube_transcript_api import YouTubeTranscriptApi


def get_video_details(video_id):
    request = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=video_id
    )
    response = request.execute()
    items = response.get('items', [])
    if not items:
        return None

    video_data = items[0]
    title = video_data['snippet']['title']
    description = video_data['snippet']['description']
    published_at = video_data['snippet']['publishedAt']
    author = video_data['snippet']['channelTitle']

    try:
      transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
      vid_transcript= " ".join([entry['text'] for entry in transcript])

    except Exception as e:
      print(f"Could not retrieve transcript for video ID {video_id}: {e}")
      return None

    return {
        'title': title,
        'description': description,
        'published_at': published_at,
        'author': author,
        'Transcript': vid_transcript,
        'video_id': video_id
    }

### Text filtering function

In [6]:
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def filter_text(text):
    if not isinstance(text, str): # Check if the text is a string
        print(f"Non-string value encountered: {text}")
        return "" # Return an empty string if text is not a string

    text = text.lower() # Convert to lowercase

    # Combine regex patterns to reduce the number of sub calls
    pattern = re.compile(r'http\S+|www\S+|https\S+|<.*?>|[\d]|[^\w\s]|[\t\n]')
    text = pattern.sub(' ', text)

    # Removing extra spaces in a single step
    text = re.sub(r'\s+', ' ', text).strip()

    # Tokenization and lemmatization in one loop
    tokens = [lemmatizer.lemmatize(word) for word in text.split() if word not in stop_words]

    return ' '.join(tokens)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### Creating a pipeline in the following sequence
- URL
- Getting video details
- Pre process text
- Convert text into Document

```
!pip uninstall nltk llama_index -y
!pip install nltk llama_index
```

In [7]:
from llama_index.core import Document
import uuid


def get_data_pipeline(url):
  video_id = get_video_id(url)
  result = get_video_details(video_id)

  if result is None:
    return None

# Generate a unique source_document_id using uuid
  global source_document_id
  source_document_id = str(uuid.uuid4())

  text=result['title'] + " " + result['description']+ " " +result['Transcript']
  preprocessed_text = filter_text(text)

  meta_data_dict={'author': result["author"], 'source_doc_id':source_document_id}

  #Converting the text in llama_index.core Document
  documents= (Document(text=text,metadata=meta_data_dict))
  return documents

### Spliting the text into Document

- `chunk_size:` This represents the maximum length of each chunk (or segment) after the text is split. For example, if chunk_size=512, each chunk will contain up to 512 characters (or tokens, depending on the implementation). This helps in breaking down large texts into smaller, manageable pieces.

- `chunk_overlap`: This defines how much of the content from the end of one chunk overlaps with the start of the next chunk. For instance, if chunk_overlap=64, the last 64 characters of one chunk will be repeated at the start of the next chunk. This ensures that important context isn't lost between chunks.

In [8]:
from llama_index.core.text_splitter import SentenceSplitter


def Document_splitter(document):
  text_splitter = SentenceSplitter(chunk_size= 500, chunk_overlap=50)
  splits= text_splitter.get_nodes_from_documents(document)
  return splits

### Setting embedd model

In [9]:
import logging
import sys
import os

import qdrant_client
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings

Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

model_optimized.onnx:   0%|          | 0.00/218M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/740 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

### Qdrant Vector Database

In [10]:
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore
import logging
import sys
import os
from google.colab import userdata

qdrant_api_key = userdata.get('qdrant_api')

client = QdrantClient(
    url="09e46b8b-d380-4b87-b0ff-fc89fa53a52d.europe-west3-0.gcp.cloud.qdrant.io:6333",
    api_key=qdrant_api_key,
)

print(client.get_collections())

collections=[CollectionDescription(name='Youtube_Chatbot')]


In [11]:
vector_store = QdrantVectorStore(
    client=client,
    collection_name="Youtube_Chatbot"
)

### Deleting Points from vector store

In [12]:
from qdrant_client.http import models

def delete_previos_vectors(source_document_ID):
  client.delete(
      collection_name="Youtube_Chatbot",
      points_selector=models.PointIdsList(
          points=[0, 3, 100],
      ),
  )
  client.delete(
      collection_name="Youtube_Chatbot",
      points_selector=models.FilterSelector(
          filter=models.Filter(
              must=[
                  models.FieldCondition(
                      key="source_doc_id",
                      match=models.MatchValue(value= source_document_ID),
                  ),
              ],
          )
      ),
  )



### Creating an index

In [13]:
from llama_index.core import VectorStoreIndex, StorageContext

def create_index(nodes):

  storage_context = StorageContext.from_defaults(vector_store=vector_store)

  documents = [Document(text=node.text, metadata=node.metadata) for node in nodes]

  index = VectorStoreIndex.from_documents(
      documents,
      storage_context=storage_context,
  )

  # index.storage_context.persist()

  return index

### Retreiving the previosly stored vector database (Not nessesary for this project)

In [72]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext

# Assuming qdrant_client and vector_store are already defined

storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context,
)

In [21]:
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

In [22]:
response

Response(response="There is no information provided about the author's childhood or growing up years. The context only talks about the author's current situation as an entrepreneur, author, and parent, and their interest in learning new things.", source_nodes=[NodeWithScore(node=TextNode(id_='4fbd0d56-8786-4c58-8ece-a0755e1fcd6a', embedding=None, metadata={'author': 'TEDx Talks', 'created_at': '2013-03-14T17:17:26Z'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='852aadab-5b73-4c12-aae6-a18af3f225e0', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'author': 'TEDx Talks', 'created_at': '2013-03-14T17:17:26Z'}, hash='2c3e5137b91ca70b54ef90a9ec03936cf9890702199d9ff71e6826402a85b1f4')}, text="And to add to the craziness, \nKelsey and I both work from home, we're entrepreneurs,\nwe run our own businesses. So, Kelsey develops courses\nonline for yoga teachers. I'm an author. And so, I'm working from ho

### Setting up LLM

In [14]:
import os
from google.colab import userdata
from llama_index.llms.groq import Groq

os.environ["GROQ_API_KEY"] = userdata.get('groq_api')
llm = Groq(model="llama3-70b-8192")
Settings.llm = llm

In [20]:
response = llm.complete("Explain the importance of low latency LLMs")
print(response)

Low-latency Large Language Models (LLMs) are crucial in various applications where real-time or near-real-time processing is essential. Here are some reasons why low-latency LLMs are important:

1. **Real-time Conversational AI**: In conversational AI, such as chatbots, voice assistants, and customer service platforms, low-latency LLMs enable rapid response times, creating a more natural and engaging user experience. Delays in response can lead to frustration and abandonment.
2. **Live Translation and Interpretation**: Low-latency LLMs facilitate real-time language translation and interpretation, enabling seamless communication across languages and cultures. This is particularly important in applications like conference calls, video conferencing, and live subtitles.
3. **Speech Recognition and Synthesis**: Fast and accurate speech recognition and synthesis are critical in applications like voice-controlled devices, voice assistants, and speech-to-text systems. Low-latency LLMs ensure t

### Clearing vector store when new URL is uploaded

**kwargs allows passing extra options (like filters, batch sizes, etc.) to the client.delete() method without explicitly defining each option in the function's argument list.

```
qdrant_client.delete_vectors(collection_name="Youtube_Chatbot", points_selector=None, wait=True)

def delete_vectors_by_source_document(client, collection_name, source_document_id, **kwargs):
    """Delete vectors from the collection associated with a specific source document.

    Args:
        client: The Qdrant client instance.
        collection_name: The name of the collection where vectors are stored.
        source_document_id: The ID of the source document whose associated vectors should be deleted.
    """
    filter = {"must": [{"key": "source_document_id", "match": {"value": source_document_id}}]}
    client.delete(collection_name=collection_name, filter=filter, **kwargs)
```

### Prompt template

In [16]:
from llama_index.core import PromptTemplate
from llama_index.core.query_pipeline import QueryPipeline


template = """
You are an AI assistant tasked with answering user questions based on the provided context, which is a transcript of a YouTube video. Your responses should be accurate and derived from the context.

Here is your task:
1. You will be given a user question.
2. You will be provided with the context (transcript) from the video.
3. Use only the provided transcript to answer the question.
4. Explain the details according to the context.
5. Try to give long answers even if the context is smaller.
6. Make headings and subheading to explain the question better.

Context: {transcript}

User Question: {user_question}

Your task is to provide an accurate answer based on the provided context. If the information needed to answer the question is not in the context, respond with "The context does not contain that information."
"""


prompt_tmpl = PromptTemplate(template)
p = QueryPipeline(chain=[prompt_tmpl, llm], verbose=True)

### Chain

In [17]:
from llama_index.core import PromptTemplate


def give_output(retriever, query):
  docs=retriever.retrieve(query)
  docs= "\n\n".join(doc.page_content for doc in docs)
  output = p.run(user_question=query,transcript=docs)
  return str(output)


### Pipeline calling

In [19]:
from operator import index

def vector_creation_pipeline(url):
  document_text= get_data_pipeline(url)
  split_doc= Document_splitter([document_text])
  index= create_index(split_doc)
  retriever = index.as_retriever(search_kwargs={"k": 3})

  return retriever

In [18]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.43.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<0.113.0 (from gradio)
  Downloading fastapi-0.112.4-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio)
  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.6.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting tomlkit==

In [24]:
# UI Components
def gradio_interface():
    # YouTube URL Input and Vector Creation
    youtube_url_input = gr.Textbox(placeholder="Enter YouTube Video URL", label="YouTube Video URL")
    youtube_submit_button = gr.Button("Submit YouTube URL")

    # User Query and Prompt Submission
    user_prompt_input = gr.Textbox(placeholder="Ask your question", label="User Prompt")
    user_submit_button = gr.Button("Submit Prompt")

    # New Chat Button
    new_chat_button = gr.Button("Start New Chat")

    # Output Area
    output_textbox = gr.Markdown()

    # Functions for interaction
    def submit_youtube_url(youtube_url):
        index = vector_creation_pipeline(youtube_url)
        return index

    def submit_user_prompt(prompt, index):
        # Assuming the retriever is based on the current index
        return give_output(index, prompt)

    def start_new_chat():
        # Resetting chat by clearing the interface and deleting vectors
        delete_previos_vectors(source_document_id)  # You can pass actual ID dynamically
        return "", "", ""  # Clear outputs (youtube index, user query result, interface)

    # Gradio Layout
    with gr.Blocks() as demo:
        # Section 1: YouTube URL and Submit
        with gr.Row():
            youtube_url_input.render()
            youtube_submit_button.render()

        # Section 2: User Query Prompt
        with gr.Row():
            user_prompt_input.render()
            user_submit_button.render()

        # Section 3: Output and New Chat Button
        with gr.Row():
            output_textbox.render()
            new_chat_button.render()

        # Define the button actions
        youtube_submit_button.click(submit_youtube_url, inputs=[youtube_url_input], outputs=[output_textbox])
        user_submit_button.click(submit_user_prompt, inputs=[user_prompt_input, output_textbox], outputs=[output_textbox])
        new_chat_button.click(start_new_chat, outputs=[youtube_url_input, user_prompt_input, output_textbox])

    return demo

# Launch the interface
gradio_interface().launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f8afa7dfa43493b160.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [21]:
import gradio as gr

# Gradio Interface
def chat_interface():
    with gr.Blocks() as interface:
        # YouTube Video URL Input
        with gr.Row():
            video_input = gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL here...")
            submit_video_button = gr.Button("Submit Video URL")

        # Output for video index
        video_output = gr.Textbox(label="Video Index Output", interactive=False)

        # Submit video URL and call vector_creation_pipeline
        submit_video_button.click(fn=vector_creation_pipeline, inputs=video_input, outputs=video_output)

        # User Prompt Input
        with gr.Row():
            prompt_input = gr.Textbox(label="Your Prompt", placeholder="Ask anything about the video...")
            submit_prompt_button = gr.Button("Submit Prompt")

        # Output for user prompt
        prompt_output = gr.Textbox(label="Prompt Output", interactive=False)

        # Submit prompt and call give_output
        submit_prompt_button.click(fn=give_output, inputs=[video_output, prompt_input], outputs=prompt_output)

        # Start New Chat Button
        start_new_chat_button = gr.Button("Start New Chat")

        # Clear the interface and call delete_previos_vectors
        start_new_chat_button.click(fn=delete_previos_vectors, inputs=[source_document_id], outputs=[])

        # Clear all outputs when starting a new chat
        start_new_chat_button.click(fn=lambda: (None, None), inputs=[], outputs=[video_output, prompt_output])

    return interface


In [23]:
chat_interface().launch()



AttributeError: 'str' object has no attribute '_id'