<a href="https://colab.research.google.com/github/gulabpatel/LLMs/blob/main/Video_QnA/01_Youtube_Video_QA_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Develop a versatile Q&A chatbot, employing LlamaIndex, ASTRA DB (Apache Cassandra), and Gradient's open-source models like LLama2, all designed for seamless interaction with YouTube videos


# Installation

In [1]:
!pip install -q cassandra-driver
!pip install -q cassio>=0.1.1
!pip install -q gradientai --upgrade
!pip install -q llama-index
!pip install -q tiktoken==0.4.0
!pip install -Uq openai-whisper
!pip install -Uq yt-dlp

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.3/166.3 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.6/137.6 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m834.9/834.9 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m217.8/217.8 kB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m78.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.8/143.8 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━

# Imports

In [2]:
import json
import os
import re
import time
import whisper
import yt_dlp
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from llama_index import ServiceContext
from llama_index import set_global_service_context
from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext
from llama_index.embeddings import GradientEmbedding
from llama_index.llms import GradientBaseModelLLM
from llama_index.vector_stores import CassandraVectorStore

# Download Audio from YouTube video function

In [3]:
def download_audio(link):
    with yt_dlp.YoutubeDL({'extract_audio': True,
                           'format': 'bestaudio',
                           'outtmpl': '%(title)s.mp3'}) as video:
        info_dict = video.extract_info(link, download = True)
        video_title = info_dict['title']
        video.download(link)
    return video_title

# Example to extract audio -

In [4]:
youtube_video_url = "https://www.youtube.com/watch?v=jBbLon5esbE"

In [5]:
download_audio(youtube_video_url)

[youtube] Extracting URL: https://www.youtube.com/watch?v=jBbLon5esbE
[youtube] jBbLon5esbE: Downloading webpage
[youtube] jBbLon5esbE: Downloading ios player API JSON
[youtube] jBbLon5esbE: Downloading android player API JSON
[youtube] jBbLon5esbE: Downloading m3u8 information
[info] jBbLon5esbE: Downloading 1 format(s): 251
[download] Destination: Getting Started with Google Cloud Speech-To-Text API Using Python for Novices.mp3
[download] 100% of    3.81MiB in 00:00:00 at 7.39MiB/s   
[youtube] Extracting URL: https://www.youtube.com/watch?v=jBbLon5esbE
[youtube] jBbLon5esbE: Downloading webpage
[youtube] jBbLon5esbE: Downloading ios player API JSON
[youtube] jBbLon5esbE: Downloading android player API JSON
[youtube] jBbLon5esbE: Downloading m3u8 information
[info] jBbLon5esbE: Downloading 1 format(s): 251
[download] Getting Started with Google Cloud Speech-To-Text API Using Python for Novices.mp3 has already been downloaded
[download] 100% of    3.81MiB


'Getting Started with Google Cloud Speech-To-Text API Using Python for Novices'

# Transcribe Audio from mp3 file

In [6]:
os.makedirs("text_files")

In [7]:
def transcribe(model, audio):
    result = model.transcribe(audio)
    with open("text_files/transcription.txt", 'w') as f:
        f.write(result["text"])
    return 1

In [8]:
model = whisper.load_model("small")

100%|████████████████████████████████████████| 461M/461M [00:01<00:00, 244MiB/s]


In [10]:
transcribe(model, "Getting Started with Google Cloud Speech-To-Text API Using Python for Novices.mp3")

1

# Setup the DataStax Vector DB Connection

In [11]:
cloud_config= {
  'secure_connect_bundle': 'secure-connect-youtubeqa.zip'
}

with open("youtubeqa-token.json") as f:
    secrets = json.load(f)

CLIENT_ID = secrets["clientId"]
CLIENT_SECRET = secrets["secret"]

auth_provider = PlainTextAuthProvider(CLIENT_ID, CLIENT_SECRET)
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
session = cluster.connect()

row = session.execute("select release_version from system.local").one()
if row:
  print(row[0])
else:
  print("An error occurred.")

ERROR:cassandra.connection:Closing connection <AsyncoreConnection(140533247606208) fb486192-f18a-4ade-8172-26d505516fa6-us-east1.db.astra.datastax.com:29042:5308a980-6df4-4250-8da7-3dbeefe944a2> due to protocol error: Error from server: code=000a [Protocol error] message="Beta version of the protocol used (5/v5-beta), but USE_BETA flag is unset"


4.0.11-b86be92b8b5f


# Environment Variables

In [18]:
from google.colab import userdata

In [19]:
os.environ['GRADIENT_ACCESS_TOKEN'] = userdata.get('GRADIENT_ACCESS_TOKEN')
os.environ['GRADIENT_WORKSPACE_ID'] = userdata.get('GRADIENT_WORKSPACE_ID')

# Define the Gradient's Model Adapter for LLAMA-2

In [15]:
llm = GradientBaseModelLLM(
    base_model_slug = "llama2-7b-chat",
    max_tokens = 400,
)

# Configure Gradient embeddings

In [16]:
embed_model = GradientEmbedding(
    gradient_access_token = os.environ["GRADIENT_ACCESS_TOKEN"],
    gradient_workspace_id = os.environ["GRADIENT_WORKSPACE_ID"],
    gradient_model_slug = "bge-large",
)

# Setup LLAMA Index Service Context

In [17]:
service_context = ServiceContext.from_defaults(
    llm = llm,
    embed_model = embed_model,
    chunk_size = 256,
)

set_global_service_context(service_context)

[nltk_data] Downloading package punkt to /tmp/llama_index...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# Load the Documents

In [20]:
documents = SimpleDirectoryReader("/content/text_files").load_data()
print(f"Loaded {len(documents)} document(s).")

Loaded 1 document(s).


# Setup and Query Index

In [21]:
index = VectorStoreIndex.from_documents(documents,
                                        service_context = service_context)
query_engine = index.as_query_engine()

In [22]:
response_out = query_engine.query("What is used to convert speech to text in the text file?")
print(response_out.response)

The speech to text conversion in the text file is done using Google's speech to text API.


In [23]:
response_out = query_engine.query("Does this require an API key?")
print(response_out.response)

Yes.

Explanation: In the video, the speaker mentions that in order to use the Google Cloud Speech to Text API, you will require the credentials of the API. The speaker then goes on to explain how to create a JSON file containing the credentials and how to save it to an environment variable called "Google underscore application underscore credentials". This indicates that an API key is required to use the Google Cloud Speech to Text API.


In [24]:
response_out = query_engine.query("Which programming language was used in the solution?")
print(response_out.response)

Python
