In [21]:
# Use the saved articles directory to get the article's text
import re, os

link = "https://www.deeplearning.ai/the-batch/issue-272/"
slug = re.sub(r'https?://[^/]+/', '', link)  # remove scheme and domain
slug = slug.strip("/").replace("/", "_")     # turn '/the-batch/issue-281/' -> 'the-batch_issue-281'

In [13]:
list_of_files = os.listdir("saved_articles")
print(len(list_of_files))

16


In [33]:
def retrieve_article_text(article_link, directory = "saved_articles"):
    slug = re.sub(r'https?://[^/]+/', '', article_link)  # remove scheme and domain
    slug = slug.strip("/").replace("/", "_")     # turn '/the-batch/issue-281/' -> 'the-batch_issue-281'
    for file in os.listdir(directory):
            if file.endswith(f"{slug}.txt"):
                print("File found, retrieving its text!")
                file_path = os.path.join(directory, file)
                with open(file_path, 'r') as article_file:
                     article_text = article_file.read()
                     return article_text
                
    print("File cannot be found!")
    return ""

article_text = retrieve_article_text(link)

File found, retrieving its text!


In [29]:
prompt = """
I will provide an article text. Please analyze it in the following way:

1. First identify and list the key topics and main points of the article.

2. Then generate 5-8 questions that:
   - Cover different aspects of understanding (factual, conceptual, analytical)
   - Are arranged from simpler to more complex understanding
   - Include a mix of:
     * Factual questions about specific information
     * Conceptual questions about main ideas
     * Questions that connect different parts of the article
     * Questions that relate the content to broader implications

3. For each question:
   - Provide the correct answer
   - Indicate what type of understanding it tests
   - Include the relevant section of text that contains the answer

Format your response like this:

KEY TOPICS:
- Topic 1
- Topic 2
[etc.]

QUESTIONS:

1. [Question Type: Factual]
Q: [Question text]
A: [Answer]
Reference: "[relevant text from article]"

2. [Question Type: Conceptual]
[etc.]

Please ensure the questions are clear, unambiguous, and directly answerable from the article content.

Article text:

{article_text}

"""

In [35]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model = 'gpt-4o-mini', max_tokens = 1024)

prompt_template = ChatPromptTemplate.from_template(prompt)

chain = prompt_template | llm | StrOutputParser()


response = chain.invoke({"article_text": article_text})

In [37]:
print(response.format())

**KEY TOPICS:**
- Speed of execution in startups and large companies
- Generative AI and its impact on prototyping
- Importance of user feedback in product development
- AI companies' investments in nuclear energy
- The changing dynamics of the Microsoft-OpenAI partnership
- Mistral AI's launch of new language models
- Advances in video generation technology

---

**QUESTIONS:**

1. **[Question Type: Factual]**
   Q: What is the primary advantage of using generative AI in startups according to the article?
   A: The primary advantage is the ability to quickly prototype AI capabilities, which can be built in days or hours instead of months.
   Reference: "Generative AI makes it possible to quickly prototype AI capabilities. AI capabilities that used to take months can sometimes be built in days or hours by simply prompting a large language model."

2. **[Question Type: Conceptual]**
   Q: How does the article suggest that the speed of prototyping with AI affects the product development 

## LLM With Structured Output

In [38]:
from pydantic import BaseModel, Field
from typing import List


class Question(BaseModel):
    question_type: str = Field(description="The type of question (Factual, Conceptual, Analytical).")
    question: str = Field(description="The question generated.")
    answer: str = Field(description="The answer of the question.")
    reference: str = Field(description="The reference text in the article from where the answer is generated.")
    

class QuestionAnswers(BaseModel):
    questions: List[Question] = Field(description="Set of questions generated")

In [40]:
structured_llm = llm.with_structured_output(QuestionAnswers)
prompt = prompt_template.format(article_text = article_text)
res = structured_llm.invoke(prompt)

In [50]:
questions = []

for index, question in enumerate(res.questions):
    quest = {}
    quest["question_type"] = question.question_type
    quest["question"] = question.question
    quest["answer"] = question.answer
    quest["reference"] = question.reference
    questions.append(quest)
    # print(f"Question #0{index + 1}")
    # print(f"Type: {question.question_type}")
    # print(f"Q: {question.question}")
    # print(f"Ans: {question.answer}")
    # print(f"Ref: {question.reference}\n")

In [51]:
questions

[{'question_type': 'Factual',
  'question': 'What is the core part of the iterative workflow in designing and building a product according to the article?',
  'answer': 'To build a prototype (or MVP, minimum viable product), get user feedback on it, and use that feedback to drive improvements.',
  'reference': '"A core part of the iterative workflow of designing and building a product ... is to build a prototype (or MVP, minimum viable product), get user feedback on it, and to use that feedback to drive improvements."'},
 {'question_type': 'Conceptual',
  'question': 'Why is the speed of prototyping AI capabilities significant for startups and large companies?',
  'answer': 'The speed of prototyping AI capabilities creates significant pressure to speed up other steps in product development, such as getting user feedback.',
  'reference': '"the speed with which we can prototype AI creates significant pressure to speed up these other steps, too."'},
 {'question_type': 'Factual',
  'quest