<a href="https://colab.research.google.com/github/dev-kant-kumar/Python-Programming/blob/main/WorkshopSample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installation

In [None]:
!pip install -q yt-dlp
!pip install -q git+https://github.com/openai/whisper.git
!pip install -U deeplake
!pip install langchain
!pip install openai

# Imports

In [36]:
import yt_dlp
import whisper
import textwrap
from langchain import OpenAI, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.vectorstores import DeepLake
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA

# Video Downloader

In [3]:
# Video info Function
def video_downloader(urls, job_id):
    # for storing information of videos
    video_info = []

    for i, url in enumerate(urls):
        file_name = f'./{job_id}_{i}.mp4'
        ydl_opts = {
            'outtmpl' : file_name,
            'quite' : True,
            'format' : "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp3]"
        }

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            # The line below is downloading the video
            result = ydl.extract_info(url, download=True)
            author = result.get("uploader","")
            title = result.get("title","")

        video_info.append((file_name, title, author))
    return video_info

In [None]:
# Download
url = ["https://www.youtube.com/watch?v=0FX4jVAL1eg&pp=ygUJam9lIHJvZ2Fu"]
video_details = video_downloader(url, "A1")

# Transcription

In [None]:
# Loading Whisper model
model = whisper.load_model("base")
result = model.transcribe("A1_0.mp4")
print(result["text"])

In [26]:
# Writing Transcription
with open("./DocumentOne.txt", 'w') as f:
    f.write(result['text'])

In [34]:
# Initializing the LLM
llm = OpenAI(openai_api_key=openai_api_key,model_name='text-davinci-003', temperature=0)

In [35]:
# Initializing the Text Splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"]
)

In [14]:
# Splitting the text
with open('./DocumentOne.txt', 'r') as file:
    text = file.read()

texts = text_splitter.split_text(text)
docs = [Document(page_content=t) for t in texts[:4]]

In [None]:
# Initializing the Chain with different Chain types
chain = load_summarize_chain(llm=llm, chain_type='map_reduce') # Use chain types "refine"

summary = chain.run(docs)

# Text Wrap
wrap = textwrap.fill(summary, width=100)
print(wrap)

In [16]:
prompt_template = """
    Write a concise summary of the following:


    "{text}"


    CONCISE SUMMARY IN BULLET POINTS
"""

Bullet_point_template = PromptTemplate(
    template=prompt_template,
    input_variables=["text"]
)

returnn = load_summarize_chain(llm=llm, chain_type="stuff", prompt=Bullet_point_template)

summary1 = returnn.run(docs)
wrap1 = textwrap.fill(summary1, width=1000, break_long_words=False, replace_whitespace=False)
print(wrap1)

    - Jellyfish have been found to be able to learn despite not having a brain
    - It is suggested that this is because they live in a vibrational universe and can feel the vibes
    - Box jellyfish are poisonous and seem to be aware of danger
    - Octopi are very smart and can memorize, open mason jars, and unlock doors


# Adding Transcripts to DeepLake

In [20]:
# Login
!activeloop login -u username -p <password>

Successfully logged in to Activeloop.


In [None]:
# Embedding & Upload
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key,model='text-embedding-ada-002')
activeloop_id = 'username
dataset = 'VideoSummarizer2'
dataset_path = f"hub://{activeloop_id}/{dataset}"

db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)
db.add_documents(docs)

# Retriever

In [22]:
# Retriever Defining
retriver = db.as_retriever()
retriver.search_kwargs['distance_metric'] = 'cos'
retriver.search_kwargs['k'] = 4

In [23]:
# Retriver Template
template = """ Use the following pieces of transcripts from a database to answer the question in bullet point and summarized.
And if the answer is not in the given transcript, then just say you don't know, Don't try to forge an answer outside of the
provided retriever.

{context}

Question = {question}
Answer in summarized bullet points:
"""

prompt_template = PromptTemplate(template=template, input_variables=['context', 'question'])

In [24]:
# Retriever Initialization
chain_type_kwargs = {"prompt" : prompt_template}
qa = RetrievalQA.from_chain_type(llm = llm,
                               chain_type="stuff",
                               retriever = retriver,
                               chain_type_kwargs=chain_type_kwargs)

In [25]:
# Inference
print(qa.run("What are the signs of top 1% humans?")) # Why Jellyfish is considered intelligent?

- Thinking and creating the world
- Being aware of danger
- Memorizing things
- Being able to open mason jars
- Being able to unlock doors
- Being able to figure out how things work
