In [1]:
!pip install --upgrade faiss-cpu tiktoken chromadb youtube-transcript-api langchain openai --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m51.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m502.4/502.4 kB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m51.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m220.9/220.9 kB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.9/92.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.7/59.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import openai
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import YoutubeLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser

In [3]:
class YouTubeGPT:
  def __init__(self) -> None:
    OPENAI_API_KEY = "" # keep secret!
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    self.llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0.3)
    self.embeddings = OpenAIEmbeddings()

  def generate_questions(self, video_url: str, n: int) -> list:
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    llm = self.llm
    prompt_template = PromptTemplate(
        input_variables = ["transcript", "number"],
        template = """
        Video: {transcript}
        You are an expert question maker. Given the above video, it is your job to create a quiz of {number} text questions.
        Make sure that questions are not repeated and check that all questions are conforming to the video as well.
        You should only use factual information from the video to create the questions. Do not include the answers.

    """
    )
    runnable = prompt_template | llm | StrOutputParser()
    questions = runnable.invoke({"transcript": transcript, "number": n})
    questions_list = questions.split('\n')
    return questions, questions_list

  def get_answers(self, video_url: str, questions) -> list:
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    llm = self.llm
    prompt_template = PromptTemplate(
        input_variables = ["transcript", "questions"],
        template = """
        Video: {transcript}
        You are an deep learning expert. Given the above video, it is your job to answer the following text questions: {questions}.
        Make sure that the answers are not repeated and that the answers are conforming to the video.
        You should only use factual information from the video to generate these answers. Do not include the questions in your response.
    """
    )
    runnable = prompt_template | llm | StrOutputParser()
    answers = runnable.invoke({"transcript": transcript, "questions": questions})
    answers_list = answers.split('\n')
    return answers, answers_list

  def evaluate_answers(self, video_url: str, questions, answers) -> str:
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    llm = self.llm
    evaluation_results = []  # To store evaluation results for each question

    for q,a in zip(questions, answers):
        print(q)
        user_answer = input()
        prompt_template = PromptTemplate(
        input_variables=["question", "answer", "student_answer", "video"],
        template=
        """
        You are a helpful tutor that can give constructive critism aimed to help a student improve their knowledge within the field of Deep Learning.
        The student has been tasked to answer the following question: {question}.
        The correct answer is: {answer}.
        The user answered the following: {student_answer}.
        Please rate the correctness of the user answer. If you are in doubt, you can fact check the literature here: {video}

        """
        )
        chain = prompt_template | llm | StrOutputParser()
        grade = chain.invoke({'question': q, 'answer': a, 'student_answer': user_answer, "video": transcript})
        print("\n"+grade)
        print()

  def yt_vector_db(self, video_url: str) -> FAISS:
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap  = 100)
    docs = text_splitter.split_documents(transcript)
    db = FAISS.from_documents(docs, self.embeddings)
    return db

  def get_summary(self, video_url: str) -> str:
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    llm = self.llm
    chain = load_summarize_chain(llm, chain_type="stuff")
    response = chain.run(transcript)
    return response

  def get_response(self, db, query, k=4) -> str:
    # 4097 tokens for gpt-3.5-turbo
    docs = db.similarity_search(query, k=k) # We can send 4 relevant docs based on the query from the user as k = 4
    content = " ".join([doc.page_content for doc in docs]) # Join 4 docs together
    llm = self.llm
    prompt_template = PromptTemplate(
        input_variables=["question", "docs"],
        template=
        """
        You are a helpful tutor that can answer questions about a video based one the video's transcript.

        You should answer the following question: {question}
        By searching in the following video transcript: {docs}

        You should only use factual information from the transcript to answer the question.

        If you do not have enough information to answer the question, respond: "Sorry, I don't know this".

        You answers should be detailed.

        """
        )
    runnable = prompt_template | llm | StrOutputParser()
    response = runnable.invoke({"question": query, "docs": content})
    return response

In [4]:
if __name__ == "__main__":
    gpt = YouTubeGPT()
    url = "https://www.youtube.com/watch?v=TTKXgRV3twU" # RNN part 1 video a week 5 on Transformers & RNNs
    #question = "What is said about the bidirectional recurrent neural networks?"
    #db = gpt.yt_vector_db(url)
    #print(f"Answer to question: {gpt.get_response(db, question)} \n")
    #print(f"Summary of lecture: {gpt.get_summary(url)}")
    questions, questions_list = gpt.generate_questions(url, 3)
    print("Questions:\n"+str(questions))
    answers, answers_list = gpt.get_answers(url, questions)
    print("Answers:\n"+str(answers))
    print(gpt.evaluate_answers(url, questions_list, answers_list))


Questions:
1. What is the main focus of week three in the deep learning course?
2. How does the recurrent architecture differ from the standard feed forward neural network?
3. How is speech represented in the deep speech architecture discussed in the video?
Answers:
1. The main focus of week three in the deep learning course is on recurrent neural networks and their applications for sequences such as time sequences or biological sequences.

2. The recurrent architecture differs from the standard feed forward neural network by having connections over time, allowing for a memory effect where information from previous time steps is used to contribute to the activation of the network at the current time step.

3. Speech is represented in the deep speech architecture by using a spectrogram, which involves taking time windows of the speech and then taking the Fourier transform to create a representation of the energy frequency content for different frequency bands over time. Additionally, th