In [54]:
import os
import sys
import random
from pprint import pprint

SCRIPT_DIR = os.path.dirname(os.path.abspath("__file__"))
sys.path.append(os.path.dirname(SCRIPT_DIR))

from llama_index.llms.groq import Groq

from quiz.topics import extract_topics
from quiz.generator import generate_quiz
from quiz.judge import JUDGE_PROMPT

In [26]:
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY)

In [39]:
topics = extract_topics(llm, url="https://www.youtube.com/watch?v=2TJxpyO3ei4")

Fetched transcript with 22735 characters, approx 5683 tokens.


In [40]:
topics

{'Introduction to RAG Application': 'The video introduces a Python RAG (Retrieval Augmented Generation) application that allows users to ask questions about a set of PDFs using natural language. The application will provide an answer and a reference to the source material. The creator mentions that this is an advanced tutorial, building upon a previous basic RAG tutorial, and will cover features such as getting the application running locally using open-source LLMs, updating the vector database with new entries, and testing and evaluating the quality of AI-generated responses.',
 'Overview of RAG and LLMs': 'The creator provides a quick recap of RAG and LLMs (Large Language Models). RAG is a way to index a data source so that it can be combined with an LLM, giving an AI chat experience that can leverage the data. The creator also mentions that LLMs are used to generate responses based on the data found in the PDF sources.',
 'Loading and Preprocessing Data': "The creator explains how t

In [44]:
topic = random.choice(list(topics.keys()))
topic

'Overview of RAG and LLMs'

In [45]:
desc = topics[topic]
desc

'The creator provides a quick recap of RAG and LLMs (Large Language Models). RAG is a way to index a data source so that it can be combined with an LLM, giving an AI chat experience that can leverage the data. The creator also mentions that LLMs are used to generate responses based on the data found in the PDF sources.'

In [46]:
quiz = generate_quiz(llm, topic, desc)
quiz

[('What is RAG in the context of the YouTube video?',
  ['a) A type of Large Language Model',
   'b) A way to index a data source',
   'c) A method for generating responses',
   'd) A type of AI chat experience'],
  'b) A way to index a data source',
  'RAG is a way to index a data source so that it can be combined with an LLM, giving an AI chat experience that can leverage the data.'),
 ('What is the primary function of Large Language Models (LLMs) in the context of the video?',
  ['a) To index data sources',
   'b) To generate responses based on data',
   'c) To provide AI chat experiences',
   'd) To analyze data'],
  'b) To generate responses based on data',
  'LLMs are used to generate responses based on the data found in the PDF sources.'),
 ('What type of sources are used by LLMs in the context of the video?',
  ['a) CSV files', 'b) PDF files', 'c) Excel files', 'd) Word documents'],
  'b) PDF files',
  'LLMs are used to generate responses based on the data found in the PDF sour

In [64]:
question, choices, answer, expl = random.choice(quiz)
question, choices, answer

('What is the primary function of Large Language Models (LLMs) in the context of the video?',
 ['a) To index data sources',
  'b) To generate responses based on data',
  'c) To provide AI chat experiences',
  'd) To analyze data'],
 'b) To generate responses based on data')

In [65]:
prompt = JUDGE_PROMPT.format(topic=desc, question=question, choices="\n".join(choices), answer=answer)

In [66]:
response = llm.complete(prompt)

In [67]:
pprint(response.text)

('Feedback:::\n'
 'Rating: Medium\n'
 '\n'
 "The question is rated as 'Medium' because:\n"
 '\n'
 '- The sentence complexity of the question is relatively simple, with a clear '
 'and direct question about the primary function of LLMs.\n'
 "- The vocabulary difficulty is moderate, with terms like 'Large Language "
 "Models' and 'generate responses', but they are not overly complex or "
 'technical.\n'
 '- The concepts involved to answer the question are relatively '
 'straightforward, as the question is based on a basic understanding of LLMs '
 'and their role in generating responses based on data. However, it does '
 'require some basic knowledge of the topic, which might make it slightly '
 'challenging for viewers who are new to the subject.')
