In [5]:
%pip install langchain-core langchain-openai python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
from langchain_core.prompts import ChatPromptTemplate

from langchain_openai import ChatOpenAI

from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter
from dotenv import load_dotenv


# Load environment variables from .env file
load_dotenv()

text_contents = []
for filename in os.listdir(".."):
    if filename.endswith(".txt"):
        with open(os.path.join("..", filename), "r") as file:
            text_contents.append(
                {
                    "file_name": os.path.splitext(filename)[0],
                    "file_content": file.read(),
                }
            )
# Requires env variable OPENAI_API_KEY to be set
model = ChatOpenAI(model="gpt-4o")

notes_template = ChatPromptTemplate(
    [
        (
            "system",
            """
    Act as an expert teacher in a course.
    You will be given a video transcript from a lesson of the course. The transcript will be provided between triple quotes.
    Create detailed study notes with the contents of the transcript. Include all the contents of the transcript in the notes.
    The notes should be in Markdown format. Follow these rules:
    - The title should be a level 1 header. It should be the name of the lesson. The title should summarize the main topic of the lesson. The title should not contain the text "Study Notes" or "Lesson".
    - Use titles, lists and tables when necessary. 
    - Do not, under any circumstance, add separators (---) between sections.
    - Use bold text delimiters (**) to highlight important concepts
    - Do not, under any circumstance, use bold text delimiters inside the titles and headers.
    - You may add brief additional information to the notes if you think it is necessary. For example, you may add explanations, examples, or references.
    """,
        ),
        (
            "user",
            """
    Transcript:
    \"\"\"
    {input}
    \"\"\"
    """,
        ),
    ]
)


file_name_template = ChatPromptTemplate(
    [
        (
            "system",
            """
    Create the name of the file based on the content of the file. 
    The name of the file should be in snake_case and it should not include the extension. The content of the file will be provided between triple quotes
    """,
        ),
        (
            "user",
            """
    File content:
    \"\"\"
    Example file content
    \"\"\"
    """,
        ),
        ("ai", "example_file_name"),
        (
            "user",
            """
    File content:
    \"\"\"
    {input}
    \"\"\"
    """,
        ),
    ]
)

flashcards_template = ChatPromptTemplate(
    [
        (
            "system",
            """
    Create flashcards based on the contents of the file. The file contains student notes from a course. 
    The file content will be provided between triple quotes. 
    Your answer should be a CSV file with two columns: Question and Answer. Follow these rules:
    - The CSV separator should be a comma. 
    - The Question cells should not contain any commas.
    - If the Answer cell has a comma, format the cell so that it does not get split into two cells. 
    - Do not include the headers in the CSV file.
    - Do not include any other commentary or code delimiters (```) in your answer.
    """,
        ),
        (
            "user",
            """
    File content:
    \"\"\"
    {input}
    \"\"\"
    """,
        ),
    ]
)

notes_chain = notes_template | model | StrOutputParser()
file_name_chain = file_name_template | model | StrOutputParser()
flashcards_chain = flashcards_template | model | StrOutputParser()

chain = (
    RunnablePassthrough()
    | {
        "file_name": itemgetter("file_name"),
        "notes": itemgetter("file_content") | notes_chain,
    }
    | {
        "file_name": itemgetter("file_name"),
        "new_file_name": itemgetter("notes") | RunnablePassthrough() | (lambda x: x[:250]) | file_name_chain,
        "notes": itemgetter("notes"),
        "flashcards": itemgetter("notes") | flashcards_chain,
    }
    | {
        "notes_file_name": lambda x: f"notes_{x['file_name']}_{x['new_file_name']}.md",
        "flashcards_file_name": lambda x: f"flashcards_{x['file_name']}_{x['new_file_name']}.csv",
        "notes": itemgetter("notes"),
        "flashcards": itemgetter("flashcards"),
    }
)

results = chain.batch(text_contents)

for result in results:
    with open(os.path.join("..", result["notes_file_name"]), "w") as file:
        file.write(result["notes"])
    with open(os.path.join("..", result["flashcards_file_name"]), "w") as file:
        file.write(result["flashcards"])