In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# This is the YouTube video we're going to use.
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"

# Setting up the model

Let's define the LLM model that we'll use as part of the workflow.

In [None]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key = OPENAI_API_KEY, model = "gpt-3.5-turbo")

We can test the model by asking a simple question.

In [None]:
model.invoke("What MLB team won the World Series during the COVID-19 pandemic?")

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("What MLB team won the World Series during the COVID-19 pandemic?")

# Introducing prompot templates

We want to provide the model with some context and the question. Prompt templates are a simple way do define and reuse prompts.

In [None]:
from langchain.prompts import ChatPromptTemplate

template = """
Answeer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context = "Mary's sister is Susana", question = "Who is Mary's sister?")

In [None]:
chain = prompt | model | parser
chain.invoke({
    "context": "Mary's sister is Susana",
    "question": "Who is Mary's sister?"
})

# Combining chains 

We can combine different chains to create more complex workflow. For example, let's create a second chain that translates the answer from the first chain into a different language.

Let's start by creating a new prompt template for the translation chain:

In [None]:
translation_prompt = ChatPromptTemplate.form_template(
    "Translate {answer} to {language}"
)

In [None]:
from operator import itemgetter

translation_chain = (
    {"answer": chain, "language": itemgetter("language")} | translation_chain | model | parser
)

translation_chain.invoke(
    {
        "context": "Mary's sister is Susana. She doesn't have any more siblings.",
        "questino": "How many  sisters does Mary have?",
        "language": "Spanish",
    }
)

# Transcribing the YouTube Video

The context we want send the model comes from a YouTube video. Let's download the video and transcribe it using OpenAI's Whisper.

In [None]:
import tempfile
import whisper
from pytube import YouTube

# Let's do this only if we haven't created the transcription file yet.
in not os.path.exist("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio = True).first()
    
    # Let's load the base model. This is not the most accurate model but it's fast.
    whisper_model = whisper.load_model("base")
    
    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path = tmpdir)
        transcription = whisper_model.transcribe(file, fp16 = False)["text"].strip()
        
        with open("transcription.txt", "w") as file:
            file.write(transcription)

Let's read the transcription and display the first few characters to ensure everything works as expected.

In [None]:
with open("transcription.txt") as file:
    transcription = file.read()
    
transcription[:100]    
# The first one handerd character.

# Using the entire transcription as cnotext

If we try to invoke the chain using the transcription as context, the model will return an error because the context  is to long.

Large Language Models support limitted context sizes. The video we are using is too long for the model to handle, so we need to find a different solution.

In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents

There are many different ways to split a document. For this example. we'll use a simple splitter that splits the document into chunks of a fixed size. Chunk Text Spliters for more information about different approaches to splitting documents.

For illustration purposes, let's split the transcription into chunks of 100 characters with an overlap of 20 characters and display the first few chunks:

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 20)
text_splitter.split_doucments(text_documents)[:5]