In [8]:
from langchain.document_loaders import YoutubeLoader
from langchain.llms import OpenAI
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain import PromptTemplate
from langchain.chains import LLMChain
from dotenv import find_dotenv, load_dotenv
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
import textwrap

In [4]:
load_dotenv(find_dotenv())

True

In [5]:
embeddings = OpenAIEmbeddings()

In [None]:
video_url = ""

In [7]:
def create_document(url):
    loader = YoutubeLoader.from_youtube_url(url)
    transcript = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 100)
    docs = text_splitter.split_documents(transcript)

    db = FAISS.from_documents(docs, embeddings)

    return db

In [9]:
def get_response_query(db, query):
    docs = db.similarity_search(query, k = 4)
    docs_page_content = " ".join([d.page_content for d in docs])

    chat = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0.7)

    template = """
        You are a helpful assistant that that can answer questions about youtube videos 
        based on the video's transcript: {docs}
        
        Only use the factual information from the transcript to answer the question.
        
        If you feel like you don't have enough information to answer the question, say "I don't know".
        
        """
    
    bot_prompt = SystemMessagePromptTemplate.from_template(template=template)

    human_template = "Answer the following question: {question}"

    human_prompt = HumanMessagePromptTemplate.from_template(template=human_template)

    chat_prompt = ChatPromptTemplate.from_messages(
        [bot_prompt, human_prompt]
    )

    chain = LLMChain(llm = chat, prompt = chat_prompt)

    response = chain.run(question=query, docs=docs_page_content)
    response = response.replace("\n", "")
    return response, docs

In [11]:
video_url = "https://www.youtube.com/watch?v=th4j9JxWGko"
db = create_document(video_url)

query = "what is this video about?"
response, docs = get_response_query(db, query)
print(textwrap.fill(response, width=50))

This video is about the speaker's new workflow for
data science projects using tools like chat GPT
and GitHub co-pilot. They demonstrate their
thought process and how they interact with these
tools to complete a data science project
efficiently. They also mention the importance of
learning to use these AI tools effectively to stay
relevant in the field. The video focuses on a
specific data set from a Kaggle competition and
the speaker's step-by-step approach to predict
measurements in a manufacturing process.
