## Installation

In [None]:
!pip install pytube transformers rank-bm25
!pip install llama-index accelerate optimum bitsandbytes

In [None]:
! CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python

## Imports

In [None]:
import torch
from transformers import pipeline
import os
from pytube import YouTube
from llama_index import SimpleDirectoryReader, PromptHelper
from llama_index import VectorStoreIndex, ServiceContext
from llama_index.retrievers import BM25Retriever
from llama_index.retrievers import QueryFusionRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.prompts import PromptTemplate
from transformers import BitsAndBytesConfig
from llama_index.llms import HuggingFaceLLM
import accelerate

In [None]:
import nest_asyncio

nest_asyncio.apply()

Download video from youtube link. Inspired from [link](https://blog.devgenius.io/download-a-video-from-youtube-and-convert-it-to-mp3-using-python-django-552141990d57)
Extracted audio and saved it to mp3 file

In [None]:
!mkdir data

In [None]:
def extract_and_save_audio(video_URL, destination, final_filename):
  video = YouTube(video_URL)
  audio = video.streams.filter(only_audio=True).first()
  output = audio.download(output_path = destination)
  _, ext = os.path.splitext(output)
  new_file = final_filename + '.mp3'
  os.rename(output, new_file)

## Extract Text from mp3 file

Using Whisper to extract text using huggingface pipeline

In [None]:
def extract_text_from_audio(audio_file):
  pipe = pipeline("automatic-speech-recognition",
                  "openai/whisper-large-v2",
                  torch_dtype=torch.float16,
                  device="cuda:0")

  pipe.model = pipe.model.to_bettertransformer()

  outputs = pipe(audio_file,
                chunk_length_s=30,
                batch_size=24,
                return_timestamps=True)

  with open('data/transcribe.txt', 'w') as f:
    f.write(outputs["text"])

## Getting the text file

In [None]:
extract_and_save_audio("https://youtu.be/0eZKYLIrNmQ", "/content/", "extracted_audio")

In [None]:
extract_text_from_audio("extracted_audio.mp3")

## Model

In [None]:
def messages_to_prompt(messages):
  prompt = ""
  for message in messages:
    if message.role == 'system':
      prompt += f"<|system|>\n{message.content}\n"
    elif message.role == 'user':
      prompt += f"<|user|>\n{message.content}\n"
    elif message.role == 'assistant':
      prompt += f"<|assistant|>\n{message.content}\n"

  # ensure we start with a system prompt, insert blank if needed
  if not prompt.startswith("<|system|>\n"):
    prompt = "<|system|>\n\n" + prompt

  # add final assistant prompt
  prompt = prompt + "<|assistant|>\n"

  return prompt

In [None]:
# load a model in 4bit using NF4 quantization with double quantization with the compute dtype bfloat16 for faster training
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

llm_zephyr = HuggingFaceLLM(
    model_name="HuggingFaceH4/zephyr-7b-alpha",
    tokenizer_name="HuggingFaceH4/zephyr-7b-alpha",
    query_wrapper_prompt=PromptTemplate("<|system|>\n\n<|user|>\n{query_str}\n<|assistant|>\n"),
    context_window=10000,
    max_new_tokens=10000,
    model_kwargs={"quantization_config": quantization_config},
    # tokenizer_kwargs={},
    generate_kwargs={"temperature": 0.3, "top_k": 50, "top_p": 0.95},
    messages_to_prompt=messages_to_prompt,
    device_map="auto",
)

## LLamaindex Hybrid Re-ranking Index

In [None]:
documents = SimpleDirectoryReader("data/").load_data()

service_context = ServiceContext.from_defaults(chunk_size=256, llm=llm_zephyr)

index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

vector_retriever = index.as_retriever(similarity_top_k=2)

In [None]:
bm25_retriever = BM25Retriever.from_defaults(
    docstore=index.docstore, similarity_top_k=2
)

retriever = QueryFusionRetriever(
    [vector_retriever, bm25_retriever],
    similarity_top_k=2,
    num_queries=4,
    mode="reciprocal_rerank",
    use_async=True,
    verbose=True,
)


query_engine = RetrieverQueryEngine.from_args(retriever, service_context=service_context)

In [None]:
response = query_engine.query("Summarise the Ai head of state?")