In [2]:
# Base OAI Libraries & Environment Setup
import tiktoken
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

from IPython.display import display, HTML, Markdown

In [102]:
# Utilities

from youtube_transcript_api import YouTubeTranscriptApi
import os
import json
import requests
import html2text
from tqdm import tqdm
from bs4 import BeautifulSoup
from operator import itemgetter

### Loading up Langchain v0.1.0 libraries

In [4]:
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import TokenTextSplitter
from langchain.text_splitter import HTMLHeaderTextSplitter
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

#New Libraries to be imported
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import YoutubeLoader
from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores import Chroma



In [5]:
load_dotenv()

True

## Definitions

In [6]:
def get_num_tokens(text, model=None):
    if model == 'gpt-4':
        enc = tiktoken.encoding_for_model("gpt-4")
    else:
        enc = tiktoken.get_encoding("cl100k_base")

    return len(enc.encode(text))

In [7]:
def read_youtube(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)

        # Convert to text
        transcript = ' '.join([t['text'] for t in transcript])

        # Create document
        document = Document(page_content=transcript, metadata={'source': f"https://www.youtube.com/watch?v={video_id}"})

        return document
    except:
        return None
    


In [115]:
def yt_search():
    
    yt_ids = []

    while True:
        yt_input = input("Enter a video_id (format 'KQjZ68mToWo') or 'Done': ")
        if yt_input.lower() == 'done':
            break
        if len(yt_input) == 11 and yt_input.isalnum():
            yt_ids.append(yt_input)
        else:
            print("Invalid format. Please enter a valid video_id.")

    return yt_ids

In [117]:
yt_search()

Invalid format. Please enter a valid video_id.
Invalid format. Please enter a valid video_id.


['LuhJEEJQgUM', 'KQjZ68mToWo', 'hvAPnpSfSGo', 'ro312jDqAh0']

In [9]:
def yt_loader(yt_ids):
    yt_docs = []
    
    for id in yt_ids:
        loader = YoutubeLoader.from_youtube_url(f"https://www.youtube.com/watch?v={id}",
        add_video_info=True,
        language=["en", "id"],
        translation="en",
        )
    
    return yt_docs.extend(RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=50).split_documents(loader.load()))

## Inputs

In [109]:
# OpenAI API Setup - gpt-3.5-turbo-1106 is a chat model
client = OpenAI()
model="gpt-3.5-turbo-1106"

#### Video Transcript

In [11]:
yt_ids = []

while True:
    yt_input = input("Enter a video_id (format 'KQjZ68mToWo') or 'Done': ")
    if yt_input.lower() == 'done':
        break
    if len(yt_input) == 11 and yt_input.isalnum():
        yt_ids.append(yt_input)
    else:
        print("Invalid format. Please enter a valid video_id.")

yt_ids


['mmBo8nlu2j0', 'hvAPnpSfSGo', 'KQjZ68mToWo']

In [12]:
yt_docs = []

for id in yt_ids:
    loader = YoutubeLoader.from_youtube_url(f"https://www.youtube.com/watch?v={id}",
    add_video_info=True,
    language=["en", "id"],
    translation="en",
    )
    
    yt_docs.extend(RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=50).split_documents(loader.load()))


In [13]:
yt_docs

[Document(page_content="all right let's get started this is the  opening eye prompt engineering page  before we can start building with it  though we need to set things some things  up so I've created a f a fresh uh  virtual environment what I'm going to do  is I'm going to set up a l  serve uh uh template a l or application  with this the reason that I'm going to  be using lay serve is it'll make it easy  to deploy this once I finish creating  so the first thing that I'm going to do  is bootstrap a link serve project I'm  going to do that with the Lang chain CLI  I'm installing it here after it finishes  installing I'm going to create a new app  I will call  it open AI  prompter I am not going to add a package  because I'm going to be creating my  own I can then go inside it and if I  open it up the main thing that I'm  interested in is this Lang serve server  right here which wraps around fast API  and will make it really easy to deploy  my Lang chain  chain the last thing I'm going 

Ongoing Problem: Cant get Youtube Chapters

#### Web Documents

In [14]:
web_addresses = []

while True: 
    web_address = input("Enter a web address (format 'https://www.example.com') or 'done': ") 
    if web_address.lower() == 'done': 
        break
    if web_address.startswith('http://') or web_address.startswith('https://'): 
        web_addresses.append(web_address) 
    else: print("Invalid format. Please enter a valid web address.")

web_addresses

['https://docs.smith.langchain.com/](https://docs.smith.langchain.com/',
 'https://python.langchain.com/docs/modules/model_io/prompts/quick_start',
 'https://python.langchain.com/docs/integrations/vectorstores/chroma']

In [15]:
web_docs = []

headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
    ("h4", "Header 4"),
    ("h5", "Header 5"),
    ("h6", "Header 6"),
]

for address in web_addresses:
    html_text = str(BeautifulSoup(requests.get(address).content, "html.parser"))
    html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on).split_text(html_text)

for doc in html_splitter:
    doc.page_content = doc.page_content.replace("\n", "")
    
web_docs.extend(html_splitter)

web_docs

[Document(page_content="Skip to main content  🦜️🔗 LangChainDocsUse casesIntegrationsGuidesAPI  More  VersioningChangelogDeveloper's guideTemplatesCookbooksTutorialsYouTube videos  Chat  🦜️🔗  LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs  Search  Providers  AnthropicAWSGoogleHugging FaceMicrosoftOpenAI  More  Components  LLMs  Chat models  Document loaders  Document transformers  Text embedding models  Vector stores  Activeloop Deep LakeAlibaba Cloud OpenSearchAnalyticDBAnnoyAstra DBAtlasAwaDBAzure Cosmos DBAzure AI SearchBagelDBBaidu Cloud ElasticSearch VectorSearchBigQuery Vector SearchChromaClarifaiClickHouseDashVectorDatabricks Vector SearchDingoDBDocArray HnswSearchDocArray InMemorySearchElasticsearchEpsillaFaissFaiss (Async)Google Vertex AI Vector SearchSAP HANA Cloud Vector EngineHippoHologresJaguar Vector DatabaseKDB.AILanceDBLanternLLMRailsMarqoMeilisearchMilvusMomento Vector Index (MVI)MongoDB AtlasMyScaleNeo4j Vector IndexNucliaDB

### Main Loader & Embeddings

In [16]:
docs = []

docs.extend(yt_docs)
docs.extend(web_docs)

In [17]:
vectordb = Chroma.from_documents(
  docs,
  embedding=OpenAIEmbeddings(),
  persist_directory='./data'
)
vectordb.persist()

In [33]:
retrieverdb = vectordb.as_retriever(search_kwargs={"k": 3})

In [34]:
# Testing the vectordb
query = "What is langsmith and its use cases"
docs = retrieverdb.get_relevant_documents(query)
docs

[Document(page_content="Skip to main content  🦜️🔗 LangChainDocsUse casesIntegrationsGuidesAPI  More  VersioningChangelogDeveloper's guideTemplatesCookbooksTutorialsYouTube videos  Chat  🦜️🔗  LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs  Search  Use cases  Interacting with APIsChatbotsExtractionSummarizationTaggingWeb scrapingCode understandingSynthetic data generation  Q&A with RAG  SQL  Tool use  Graph querying  Use casesSummarization  On this page  SummarizationUse case\u200bOverview\u200bQuickstart\u200bOption 1. Stuff\u200bGo deeper\u200bOption 2. Map-Reduce\u200bGo deeper\u200bOption 3. Refine\u200bSplitting and summarizing in a single chain\u200b"),
 Document(page_content="Skip to main content  🦜️🔗 LangChainDocsUse casesIntegrationsGuidesAPI  More  VersioningChangelogDeveloper's guideTemplatesCookbooksTutorialsYouTube videos  Chat  🦜️🔗  LangSmithLangSmith DocsLangServe GitHubTemplates GitHubTemplates HubLangChain HubJS/TS Docs  Searc

## Model Setup

In [110]:
llm = ChatOpenAI(model=model, verbose=True)

In [41]:
# Defining the Example Prompt
style = """
    # TITLE: 
    - As per user provided Title.
    
    ## ABSTRACT: 
    - summarize the main concepts covered in the document.
    - emphasize critical points or key takeaways.
    - Use bold or italic text to highlight these.

    ## KEY POINTS:
    - Include important terms and their meanings.
    - Break the topic into smaller sections.
    - Each section should focus on a specific aspect of the topic.
    - Use bullet points or numbered lists for clarity.
    
    ## CONTEXT 
    - Elaborate on the points that are mentioned in the Key Points without repeating word for word
    - The context should focus on the details of the document, should be well structured, informative, in depth, with facts and numbers if available and a minimum of 200 words.
    - Provide examples to illustrate how concepts are applied.
    - You should strive to write the context as long as you can using all relevant and necessary information provided.
    - You must write the context in bullet form.
    - You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
    
    ## REFLECTIONS
    - Formulate 3 to 5 questions that test understanding of the topic.
    - Include space for reflections or personal notes.
    - Recap the most important points
    
    ## CODE EXAMPLES
    """

In [92]:
system_message = """
        You are a world class note taking assistant.
        You summarize notes into concise manner summarizing the most relevant information.
        Follow the information in given in the text, do not make things up - unless you are asked for examples.
        Follow the guidelines provided in the style (between the dotted lines) for reference on how to summarize the note.

        ----------------------------------------------
        {style}
        ----------------------------------------------
    """

In [93]:
user_message = """Use the following document to summarize into a note under the specified title
        the note must follow a specified styleand  specified style:
        
        <document>
        {context}
        </document>
        
        title: {input}"""

In [105]:
title = "Prompt Engineering"

In [94]:
prompt = ChatPromptTemplate.from_messages([
    ("system", system_message),
    ("human", user_message)
    ]
)

### Model Invoke

In [111]:
chain = (
    {
        "context": itemgetter("input") | retrieverdb,
        "input": itemgetter("modelinput"),
        "style": itemgetter("style"),
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [112]:
chain

{
  context: RunnableLambda(itemgetter('input'))
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7f0a78b6e320>, search_kwargs={'k': 3}),
  input: RunnableLambda(itemgetter('input')),
  style: RunnableLambda(itemgetter('style'))
}
| ChatPromptTemplate(input_variables=['context', 'input', 'style'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['style'], template='\n        You are a world class note taking assistant.\n        You summarize notes into concise manner summarizing the most relevant information.\n        Follow the information in given in the text, do not make things up - unless you are asked for examples.\n        Follow the guidelines provided in the style (between the dotted lines) for reference on how to summarize the note.\n\n        ----------------------------------------------\n        {style}\n        ------------------------------------------

In [113]:
response = chain.invoke({"input": title, "style": style})

In [114]:
Markdown(response)

# TITLE: 
Prompt Engineering

## ABSTRACT: 
- Prompt templates are predefined recipes for generating prompts for language models. 
- LangChain provides tooling to create and work with prompt templates, aiming to create model agnostic templates to facilitate reuse across different language models. 
- Users can easily customize the prompt and try different LLMs (e.g., Claude) via the llm parameter.

## KEY POINTS:
- Prompt Templates:
    - Predefined recipes for generating prompts for language models.
    - Includes instructions, few-shot examples, specific context, and questions for a given task.
- LangChain Tooling:
    - Provides tooling to create and work with prompt templates.
    - Aims to create model agnostic templates for easy reuse across different language models.
- Prompt Customization:
    - Users can easily customize the prompt and try different LLMs via the llm parameter.

## CONTEXT
- Prompt templates are predefined recipes for generating prompts for language models, including instructions, few-shot examples, specific context, and questions appropriate for a given task.
- LangChain provides tooling to create and work with prompt templates, striving to create model agnostic templates for easy reuse across different language models.
- Users can easily customize the prompt and try different LLMs (e.g., Claude) via the llm parameter.
- The document discusses setting up a LangChain environment for prompt engineering, involving the creation of an app using LangChain CLI and LangServe, as well as setting up LangSmith for debugging purposes.
- It also touches upon the process of creating a chain in a Jupyter notebook to demonstrate how it can be exported from a notebook, emphasizing the iterative environment and ease of exporting a chain.
- The document provides code examples and mentions importing prompt templates, output parsers, and chat models from LangChain, highlighting the simplicity and effectiveness of the prompt engineering process.

## REFLECTIONS
- How does LangChain ensure model agnostic templates for prompt engineering?
- What are the benefits of using Jupyter notebook for creating and exporting prompt chains?
- Can you provide an example of customizing a prompt for a specific language model?
- How does LangSmith aid in the debugging process for prompt engineering?
- The document emphasizes the importance of model agnostic prompt templates and the ease of customization, showcasing the practical application of prompt engineering in language model tasks.

    ----------------------------------------------

## Base Query Experiments - No Langchain

### Testing out the splitting of the transcripts by chunks according to tokens
It seems to be better to split by tokens - where we are getting exact as opposed to the Characters (via RecursiveCharacterTextSplitter) because we are achieving more accurate results based on input limits - which is our primary concern. By defining encoding name and model_name, we can get exact num_tokens as desired.

In [11]:
def get_completion(prompt, model=model):
    messages = prompt
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, 
    )
    return response.choices[0].message.content

In [15]:
document = Document(page_content=transcript, metadata={'source': f"https://www.youtube.com/watch?v={video_id}"})
# splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
# chunks = splitter.split_documents([document])

text_splitter = TokenTextSplitter(chunk_size=15000, chunk_overlap=100, encoding_name="cl100k_base", model_name=model)

texts = text_splitter.split_text(transcript)

In [16]:
get_num_tokens(texts[0], model=model)

15000

In [17]:
len(texts)

19

In [19]:
system_message_i = """
    You are a note taking assistant for a courses. 
    
    Given the Document, write a note based on the following format and instructions defined in point format below:
    
    # TITLE: 
    - One line catchy title to capture the essence of the document.
    
    ## ABSTRACT: 
    - summarize the main concepts covered in the document.
    - emphasize critical points or key takeaways.
    - Use bold or italic text to highlight these.

    ## KEY POINTS:
    - Include important terms and their meanings.
    - Break the topic into smaller sections.
    - Each section should focus on a specific aspect of the topic.
    - Use bullet points or numbered lists for clarity.
    
    ## CONTEXT 
    - The context should focus on the details of the document, should be well structured, informative, in depth, with facts and numbers if available and a minimum of 200 words.
    - Provide examples to illustrate how concepts are applied.
    - You should strive to write the context as long as you can using all relevant and necessary information provided.
    - You must write the context in bullet form.
    - You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
    
    ## REFLECTIONS
    - Formulate questions that test understanding of the topic.
    - Include space for reflections or personal notes.
    - Recap the most important points
    
    """


In [32]:
list_of_outputs = []

for i in range(len(texts)):
    
    user_message = f"""Document: {texts[i]}"""
    prompt = [
            {'role': 'system', 'content': system_message_i},
            {'role': 'user', 'content': user_message}
        ]

    response = get_completion(prompt, model=model)
    
    list_of_outputs.append(response)

In [33]:
len(list_of_outputs)

19

In [34]:
system_message_c = """
    You are a note taking assistant for a courses who summarizes several notes. Each note is separated by "***" sign
    
    Given these combined notes, compile a new note with the same headers but combining the points under each header. 
    Make sure there is no duplication of points.    
    """


In [35]:
combined_note = "***"

for i in range(len(list_of_outputs)):
    combined_note += list_of_outputs[i] + "***"
    

In [36]:
combined_note

"*** on the gpu but for now this is just a brief introduction to google colab and how we're going to be writing code throughout the course so that's it for this video we've covered a lot of the fundamentals we've covered how to approach the course we've covered the resources for the course and now we've got into writing some code so i'll see you in the next video where we're going to start diving into the pytorch fundamentals and writing some actual machine learning code.***the resulting shape of the matrix multiplication is going to be tensor a dot matmul tensor b dot t dot shape and then we'll print out the result of the matrix multiplication so let's see what happens here oh we've got a little bit of a typo here tensor a dot matmul tensor b dot t dot shape tensor a dot matmul tensor b dot t and then we'll print out the result of the matrix multiplication so let's see what happens here oh we've got a little bit of a typo here tensor a dot matmul tensor b dot t dot shape tensor a dot 

In [37]:
get_num_tokens(combined_note, model=model)

8028

In [None]:
user_message = f"""Document: {combined_note}"""
prompt = [
        {'role': 'system', 'content': system_message_c},
        {'role': 'user', 'content': user_message}
    ]

response = get_completion(prompt, model=model)


In [39]:
response

"# Combined Note:\n\n## on the gpu but for now this is just a brief introduction to google colab and how we're going to be writing code throughout the course so that's it for this video we've covered a lot of the fundamentals we've covered how to approach the course we've covered the resources for the course and now we've got into writing some code so i'll see you in the next video where we're going to start diving into the pytorch fundamentals and writing some actual machine learning code.\n\n## the resulting shape of the matrix multiplication is going to be tensor a dot matmul tensor b dot t dot shape and then we'll print out the result of the matrix multiplication so let's see what happens here oh we've got a little bit of a typo here tensor a dot matmul tensor b dot t dot shape tensor a dot matmul tensor b dot t and then we'll print out the result of the matrix multiplication so let's see what happens here oh we've got a little bit of a typo here tensor a dot matmul tensor b dot t 

## Langchain Experiments

### Langsmith Setup [TBD]

The Langsmith does not work - put in placeholder - need to test

In [26]:
LANGCHAIN_API_KEY = os.getenv('LANGSMITH_API_KEY')

In [28]:
!export LANGCHAIN_TRACING_V2=true
!export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
!export LANGCHAIN_API_KEY="ls__47bc0afe60cd4471a05f4f1578aac790"
!export LANGCHAIN_PROJECT="pt-large-date-95"

### Setting up the Embeddings & Vector DBs

In [20]:
# Testing with FAISS
# embeddings = OpenAIEmbeddings()
# vector = FAISS.from_documents(docs, embeddings)
# retriever = vector.as_retriever(search_kwargs={"k": 3})

### Setting up the Model

In [129]:
#frame the LLM object
llm = ChatOpenAI(model=model, verbose=True)
llm.invoke("how can langsmith help with testing?").content

'Langsmith can help with testing in several ways:\n\n1. Automated Testing: Langsmith can provide tools and frameworks for implementing automated testing of software applications. This can help in quickly and efficiently running tests and identifying any issues or bugs in the code.\n\n2. Test Case Management: Langsmith can offer solutions for managing and organizing test cases, making it easier for QA teams to create, execute, and track test cases for different scenarios.\n\n3. Performance Testing: Langsmith can assist in conducting performance testing to evaluate the speed, responsiveness, and stability of software applications under various conditions.\n\n4. Test Data Management: Langsmith can provide tools for managing test data, ensuring that the right data is available for testing different aspects of the software.\n\n5. Continuous Integration and Continuous Testing: Langsmith can support the implementation of continuous integration and continuous testing processes, enabling develo

#### Prompt Templates

In [49]:
style = """
    # TITLE: 
    - As per user provided Title.
    
    ## ABSTRACT: 
    - summarize the main concepts covered in the document.
    - emphasize critical points or key takeaways.
    - Use bold or italic text to highlight these.

    ## KEY POINTS:
    - Include important terms and their meanings.
    - Break the topic into smaller sections.
    - Each section should focus on a specific aspect of the topic.
    - Use bullet points or numbered lists for clarity.
    
    ## CONTEXT 
    - Elaborate on the points that are mentioned in the Key Points without repeating word for word
    - The context should focus on the details of the document, should be well structured, informative, in depth, with facts and numbers if available and a minimum of 200 words.
    - Provide examples to illustrate how concepts are applied.
    - You should strive to write the context as long as you can using all relevant and necessary information provided.
    - You must write the context in bullet form.
    - You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
    
    ## REFLECTIONS
    - Formulate 3 to 5 questions that test understanding of the topic.
    - Include space for reflections or personal notes.
    - Recap the most important points
    """

In [50]:
system_message = f"""
        You are a world class note taking assistant.
        You summarize notes into concise manner summarizing the most relevant information.
        Follow the information in given in the text, do not make things up - unless you are asked for examples.
        Follow the guidelines provided in the style for reference on how to summarize the note.

        <style>
        {style}
        </style>
    """

In [51]:
prompt = ChatPromptTemplate.from_messages([
    ("system", system_message),
    ("user", 
        """Use the following document to summarize into a note under the specified title
        the note must follow a specified styleand  specified style:
        
        <document>
        {context}
        </document>
        
        title: {input}"""
    )
])

In [130]:
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [131]:
response = retrieval_chain.invoke({"input": "Prompt Engineering"})

In [132]:
print(f"Response answer: {response['answer']}") 

Response answer: <style>
        
    # TITLE: 
    - Prompt Engineering
    
    ## ABSTRACT: 
    - Prompt templates are predefined recipes for generating prompts for language models, including instructions, few-shot examples, and specific context and questions. LangChain provides tooling for creating and working with prompt templates, aiming to create model agnostic templates for easy reuse across different language models.
    
    ## KEY POINTS:
    - Prompt templates: Predefined recipes for generating prompts for language models.
    - LangChain tooling: Provides tools for creating and working with prompt templates.
    - Model agnostic templates: Designed for easy reuse across different language models.
    - Prompt structure: Language models expect the prompt to be either a string or a list of chat messages.
    
    ## CONTEXT 
    - Prompt templates are predefined recipes for generating prompts for language models. These templates include instructions, few-shot examples, and 

In [21]:
output_parser = StrOutputParser()

In [23]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f2aac22c790>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='\n        You are a world class note taking assistant.\n        You summarize notes into concise manner summarizing the most relevant information.\n        Follow the information in given in the text, do not make things up - unless you are asked for examples.\n        Follow the guidelines provided in the sty

In [24]:
chain = create_retrieval_chain(retriever, document_chain) | output_parser
chain.invoke({"input": "ChatPromptTemplate"})

ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)

## SuperSeeded

In [22]:
chain = prompt | llm 
chain.invoke({"input": "how can langsmith help with testing?"})

AIMessage(content="Langsmith can greatly assist with testing by providing a comprehensive and efficient testing framework. Here are some ways in which Langsmith can help with testing:\n\n1. Test Automation: Langsmith enables the automation of tests by providing a testing framework that allows developers to write tests in a clear and concise manner. This framework supports various testing methodologies, such as unit testing, integration testing, and end-to-end testing.\n\n2. Test Coverage: Langsmith helps in measuring the coverage of tests by providing tools and features to track which parts of the codebase have been tested. This ensures that all critical areas of the application are thoroughly tested, reducing the chances of undiscovered bugs.\n\n3. Test Orchestration: Langsmith allows developers to easily manage and run tests across different environments and configurations. It provides a way to define test suites, group related tests, and execute them in parallel or sequentially, imp

In [25]:
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

In [25]:


output_parser = StrOutputParser()
chain = prompt | llm | output_parser
chain.invoke({"input": "how can langsmith help with testing?"})

'Langsmith can greatly assist with testing in several ways:\n\n1. Test Case Generation: Langsmith can automatically generate test cases based on the specifications and requirements of your software. It uses natural language processing techniques to understand the desired behavior of the system and generate test cases that cover various scenarios and edge cases. This can save significant time and effort in test case creation.\n\n2. Test Data Generation: Langsmith can also generate realistic and diverse test data for your software. It understands the data requirements and constraints of your system and creates test data that covers different data types, ranges, and combinations. This can help in ensuring thorough testing and identifying potential issues related to data handling.\n\n3. Test Automation: Langsmith can generate test scripts or code snippets in various programming languages to automate the execution of test cases. It can integrate with popular testing frameworks and tools to 

In [40]:
# To control the randomness and creativity of the generated
# text by an LLM, use temperature = 0.0
chat = ChatOpenAI(temperature=0.0, model=model)
chat

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7f8e84927340>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7f8e6bf61d80>, model_name='gpt-3.5-turbo-1106', temperature=0.0, openai_api_key='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', openai_proxy='')

In [41]:
template_string = """You are a study note taking assistant for courses.

Given the text delimeted by tripple backticks, extract information into a study note following the style that is {style}. 

text: ```{text}```
"""

prompt_template = ChatPromptTemplate.from_template(template_string)

prompt_template.messages[0].prompt

PromptTemplate(input_variables=['style', 'text'], template='You are a study note taking assistant for courses.\n\nGiven the text delimeted by tripple backticks, extract information into a study note following the style that is {style}. \n\ntext: ```{text}```\n')

In [42]:
prompt_template.messages[0].prompt.input_variables

['style', 'text']

In [57]:
studynote_style = """
    ## ABSTRACT: 
    - summarize the main concepts covered in the document.
    - emphasize critical points or key takeaways.
    - Use bold or italic text to highlight these.

    ## KEY POINTS:
    - Include important terms and their meanings.
    - Break the topic into smaller sections.
    - Each section should focus on a specific aspect of the topic.
    - Use bullet points or numbered lists for clarity.
    
    ## CONTEXT 
    - The context should focus on the details of the document, should be well structured, informative, in depth, with facts and numbers if available and a minimum of 200 words.
    - Provide examples to illustrate how concepts are applied.
    - You should strive to write the context as long as you can using all relevant and necessary information provided.
    - You must write the context in bullet form.
    - You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
    
    ## REFLECTIONS
    - Formulate questions that test understanding of the topic.
    - Include space for reflections or personal notes.
    - Recap the most important points
"""

In [45]:
studynote_text = document.page_content

In [58]:
studynote_messages = prompt_template.format_messages(
                    style=studynote_style,
                    text=studynote_text)
studynote_messages

[HumanMessage(content="You are a study note taking assistant for courses.\n\nGiven the text delimeted by tripple backticks, extract information into a study note following the style that is \n    ## ABSTRACT: \n    - summarize the main concepts covered in the document.\n    - emphasize critical points or key takeaways.\n    - Use bold or italic text to highlight these.\n\n    ## KEY POINTS:\n    - Include important terms and their meanings.\n    - Break the topic into smaller sections.\n    - Each section should focus on a specific aspect of the topic.\n    - Use bullet points or numbered lists for clarity.\n    \n    ## CONTEXT \n    - The context should focus on the details of the document, should be well structured, informative, in depth, with facts and numbers if available and a minimum of 200 words.\n    - Provide examples to illustrate how concepts are applied.\n    - You should strive to write the context as long as you can using all relevant and necessary information provided.\

In [59]:
studynote_response = chat(studynote_messages)

In [60]:
Markdown(studynote_response.content)

## ABSTRACT:
The document discusses the Lang chain expression language, which allows for writing minimalist code to build chains within line chain. It emphasizes the advanced features of parallel execution, async, and streaming using the expression language. The document also explores the syntax and functionality of the pipe operator and the runnable lambdas.

## KEY POINTS:
- Lang chain expression language for building minimalist code to create chains within line chain.
- Advanced features include parallel execution, async, and streaming.
- Syntax and functionality of the pipe operator and the runnable lambdas.

## CONTEXT:
- The Lang chain expression language allows for writing minimalist code to build chains within line chain, making it easier to use advanced features like parallel execution, async, and streaming.
- The pipe operator is used to string components together, making the code simpler and more flexible.
- The runnable lambdas are used to wrap functions and create custom operations within the expression language.
- The document provides examples of using the expression language to retrieve information in parallel and modify the output using runnable lambdas.
- The expression language has pros such as minimalist code and advanced features, but also cons like increased abstraction and non-standard syntax.

## REFLECTIONS:
- How does the Lang chain expression language compare to other methods of building chains within line chain?
- What are the potential use cases for the runnable lambdas within the expression language?
- The expression language offers a minimalist approach and advanced features, but also introduces increased abstraction and non-standard syntax.

In [89]:
python_schema = ResponseSchema(name="python_code",
                            description="Parse any code within the text and write the code in string with backticks. \
                                If there was no code found, then output as -1.")
webpage_schema = ResponseSchema(name="webpage_link",
                                    description="Was there any webpage links recommended. \
                                    If this information is not found, output -1.")
reading_schema = ResponseSchema(name="further_reading",
                                    description="Extract any recommendations on further research or reading on the subject. \
                                    Output them as a comma separated Python list. If none is recommended, output -1.")

response_schemas = [python_schema, 
                    webpage_schema,
                    reading_schema]

In [90]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()
format_instructions

'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"python_code": string  // Parse any code within the text and write the code in string with backticks.                                 If there was no code found, then output as -1.\n\t"webpage_link": string  // Was there any webpage links recommended.                                     If this information is not found, output -1.\n\t"further_reading": string  // Extract any recommendations on further research or reading on the subject.                                     Output them as a comma separated Python list. If none is recommended, output -1.\n}\n```'

In [109]:
studynote_text = document.page_content

template_string2 = """\
For the following text, extract the following information:

python_code: Was there a code on the text write the code in string with backticks. If there was no code text, then output as -1.

webpage_link: Was there any webpage links recommended. If this information is not found, output -1.

further_reading: Extract any recommendations on further research or reading on the subject Output them as a comma separated Python list. If none is recommended, output -1.

text: {text}

{format_instructions}
"""

dict_prompt = ChatPromptTemplate.from_template(template=template_string2)

dict_messages = dict_prompt.format_messages(text=studynote_text, 
                                format_instructions=format_instructions)

print(dict_messages[0].content)

For the following text, extract the following information:

python_code: Was there a code on the text write the code in string with backticks. If there was no code text, then output as -1.

webpage_link: Was there any webpage links recommended. If this information is not found, output -1.

further_reading: Extract any recommendations on further research or reading on the subject Output them as a comma separated Python list. If none is recommended, output -1.

text: today we're going to be talking about Lang chain expression language which is a pretty interesting idea that essentially allows us to write very minimalist code to build chains within line chain and for sure I think we'll see from this video we can use a lot of L chains more advanced features like parallel execution async and streaming very easily using the expression language rather than just the more typical approach to build Lang chain chains and in my opinion it's worth trying just for that I think we'll see that just us

In [110]:
dict_response = chat(messages)

dict_response.content

'```json\n{\n\t"python_code": -1,\n\t"webpage_link": -1,\n\t"further_reading": "Lang chain expression language, Line chain products, Line chain abstraction"\n}\n```'

In [93]:
output_dict = output_parser.parse(response.content)
#extract dict key gift
output_dict

{'python_code': -1,
 'webpage_link': -1,
 'further_reading': 'Lang chain expression language, Line chain products, Line chain abstraction'}

### Memory

In [101]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.memory import ConversationBufferWindowMemory
from langchain.memory import ConversationTokenBufferMemory
from langchain.memory import ConversationSummaryBufferMemory
# Vector Data Memory
# Entity Memories

In [108]:
memory = ConversationSummaryBufferMemory(llm=chat, max_token_limit=500)


In [111]:
memory.save_context({"input": studynote_messages}, {"output": studynote_response})
memory.save_context({"input": dict_messages}, {"output": dict_response})

In [112]:
memory.load_memory_variables({})

{'history': 'System: The human provides a detailed set of instructions for extracting information into a study note following a specific style. The text provided includes a discussion about the Lang chain expression language, its syntax, and how it works. The AI demonstrates how to use the expression language to run retrievers in parallel and modify the output using runnable lambdas. The AI also discusses the pros and cons of the expression language and concludes that it is worth learning and experimenting with. The human then requests the output to be formatted as a markdown code snippet following a specific schema.\nAI: ```json\n{\n\t"python_code": -1,\n\t"webpage_link": -1,\n\t"further_reading": "Lang chain expression language, Line chain products, Line chain abstraction"\n}\n```'}

In [113]:
conversation = ConversationChain(
    llm=chat, 
    memory = memory,
    verbose=True
)

In [114]:
conversation.predict(input="Hi, what is langchain about?")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
System: The human provides a detailed set of instructions for extracting information into a study note following a specific style. The text provided includes a discussion about the Lang chain expression language, its syntax, and how it works. The AI demonstrates how to use the expression language to run retrievers in parallel and modify the output using runnable lambdas. The AI also discusses the pros and cons of the expression language and concludes that it is worth learning and experimenting with. The human then requests the output to be formatted as a markdown code snippet following a specific schema.
AI: ```json
{
	"python_code": -1,
	"webpage_

'Lang chain is a powerful expression language that allows for running retrievers in parallel and modifying the output using runnable lambdas. It has a specific syntax and is worth learning and experimenting with. It is used for line chain products and line chain abstraction.'

In [116]:
window_memory = ConversationBufferWindowMemory(k=1)  
token_memory = ConversationTokenBufferMemory(llm=chat, max_token_limit=50)

### Chains

In [121]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SimpleSequentialChain
from langchain.chains import SequentialChain

In [118]:
llm = ChatOpenAI(temperature=0.9, model=model)
prompt = ChatPromptTemplate.from_template(
    "What is the best name to describe \
    a company that makes {product}?"
)
chain = LLMChain(llm=llm, prompt=prompt)

In [122]:
product = studynote_messages
chain.run(product)

'Lang Chain Expression Language company'

In [128]:
# chain 1: input= text and output= summary
first_prompt = ChatPromptTemplate.from_template(
    "Come up with a summary for the following text:"
    "\n\n{text}"
)

chain_one = LLMChain(llm=llm, prompt=first_prompt, 
                     output_key="summary"
                    )

# chain 2: input= summary and output= search queries
second_prompt = ChatPromptTemplate.from_template(
    "Generate some additional web search queries based on the summary:"
    "\n\n{summary}"
)

chain_two = LLMChain(llm=llm, prompt=second_prompt, 
                     output_key="search_queries"
                    )

# chain 2: input= summary and output= topic
third_prompt = ChatPromptTemplate.from_template(
    "Generate a one line topic based summary:"
    "\n\n{summary}"
)

chain_three = LLMChain(llm=llm, prompt=third_prompt, 
                     output_key="topic"
                    )

# overall_chain: input= Review 
# and output= English_Review,summary, followup_message
overall_chain = SequentialChain(
    chains=[chain_one, chain_two, chain_three],
    input_variables=["text"],
    output_variables=["summary", "search_queries","topic"],
    verbose=True)

In [130]:
text = document.page_content
overall_chain(text)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


{'text': "today we're going to be talking about Lang chain expression language which is a pretty interesting idea that essentially allows us to write very minimalist code to build chains within line chain and for sure I think we'll see from this video we can use a lot of L chains more advanced features like parallel execution async and streaming very easily using the expression language rather than just the more typical approach to build Lang chain chains and in my opinion it's worth trying just for that I think we'll see that just using this you can build stuff very quickly that's not to say it doesn't have its cons but we'll dive into those later so let's just begin with what this expression language actually is so there's a page here in the line train dos talking about this expression language right so it's LC for short and yeah they just explain a few things you know we streaming acing parel execution so on and so on right but let's just jump into this notebook and we'll see more o

### Over a larger document - Vector

Not working right now - need to revisit

In [None]:
studynote_template =  """You are a study note taking assistant for courses.
You are expected to take notees for a course.

Given the course delimeted by tripple backticks, extract information into a study note following the format: 
{format}. 

course: ```{text}```"""


meetingnote_template = """You are a meeting note assistant. \
You are assighed to take notes for a meeting. \
You are expected to take notes in the following format:
{format}.

Here is a transcript:
transcript: '''{text}'''

"""

In [168]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.indexes import VectorstoreIndexCreator

In [182]:
from langchain_community.document_loaders import DirectoryLoader

In [198]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader("/home/dpvj/SecondBrain/templates/Meeting Note Template.md")
docs =loader.load()

In [199]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [200]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)



In [175]:
embed = embeddings.embed_query(text)

In [201]:
query = "Wnat are the key points of the document?"

In [204]:
retriever = db.as_retriever()

In [205]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [206]:
response = chat.call_as_llm(f"{qdocs} Question: Please list all the headers of the documents") 
response


'\n- [ ]  Follow-up meetings scheduled\n- [ ]  Next steps and responsibilities assigned\n\n## Decisions\n_List any decisions made during the meeting_\n\n## Next Steps\n_Outline the next steps and responsibilities_\n\n## Follow-up\n_Summarize any follow-up actions required_\n\n## Additional Notes\n_Any additional notes or information related to the meeting_\n\n## Meeting Adjourned\n_Time the meeting was adjourned_\n\n## Next Meeting\n_Date, time, and agenda for the next meeting_'

In [207]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [209]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

## Explorations - Youtube

In [133]:
studynote_template =  """You are a study note taking assistant for courses.
You are expected to take notees for a course.

Given the course delimeted by tripple backticks, extract information into a study note following the format: 
{format}. 

course: ```{text}```"""


meetingnote_template = """You are a meeting note assistant. \
You are assighed to take notes for a meeting. \
You are expected to take notes in the following format:
{format}.

Here is a transcript:
transcript: '''{text}'''

"""

In [132]:
studynote_format = studynote_style

In [134]:
meetingnote_format = """
## Agenda
_Summarize the agenda of the meeting_
_Ensure key stakeholders are participating & Leading_

## Goals
_What do we want to achieve from this meeting_
_Align with why the meeting was called in the first place_

## Discussion notes
_Write the notes that are key to the goals & objectives, note who has said it_

## Action items
_Summarize the action items_
"""

In [159]:
prompt_infos = [
    {
        "name": "studynote", 
        "description": "Good for taking notes for a course", 
        "prompt_template": studynote_template,
        "prompt_style": studynote_format
    },
    {
        "name": "meetingnote",  # Changed from "math" to "meetingnote"
        "description": "Good for taking notes for a meeting", 
        "prompt_template": meetingnote_template,
        "prompt_style": meetingnote_format
    }
]


In [160]:
from langchain.chains.router import MultiPromptChain
from langchain.chains.router.llm_router import LLMRouterChain,RouterOutputParser
from langchain.prompts import PromptTemplate

In [161]:
destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    prompt_template = p_info["prompt_template"]
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    chain = LLMChain(llm=chat, prompt=prompt)
    destination_chains[name] = chain  
    
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

In [None]:
prompt_template.format_messages(
                    style=studynote_style,
                    text=studynote_text)

In [158]:
destination_chains['studynote'].prompt.format_messages(format=studynote_style, text=studynote_text)

[HumanMessage(content="You are a study note taking assistant for courses.\nYou are expected to take notees for a course.\n\nGiven the course delimeted by tripple backticks, extract information into a study note following the format: \n\n    ## ABSTRACT: \n    - summarize the main concepts covered in the document.\n    - emphasize critical points or key takeaways.\n    - Use bold or italic text to highlight these.\n\n    ## KEY POINTS:\n    - Include important terms and their meanings.\n    - Break the topic into smaller sections.\n    - Each section should focus on a specific aspect of the topic.\n    - Use bullet points or numbered lists for clarity.\n    \n    ## CONTEXT \n    - The context should focus on the details of the document, should be well structured, informative, in depth, with facts and numbers if available and a minimum of 200 words.\n    - Provide examples to illustrate how concepts are applied.\n    - You should strive to write the context as long as you can using all 

In [162]:
default_prompt = ChatPromptTemplate.from_template("{text}")
default_chain = LLMChain(llm=llm, prompt=default_prompt)

In [163]:
MULTI_PROMPT_ROUTER_TEMPLATE = """Given a raw text input to a \
language model select the model prompt best suited for the input. \
You will be given the names of the available prompts and a \
description of what the prompt is best suited for. \
You may also revise the original input if you think that revising\
it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt \
names specified below OR it can be "DEFAULT" if the input is not\
well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input \
if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (remember to include the ```json)>>"""

In [164]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(
    destinations=destinations_str
)
router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser(),
)

router_chain = LLMRouterChain.from_llm(llm, router_prompt)

In [165]:
chain = MultiPromptChain(router_chain=router_chain, 
                         destination_chains=destination_chains, 
                         default_chain=default_chain, verbose=True
                        )

In [166]:
chain.run(text)



[1m> Entering new MultiPromptChain chain...[0m




OutputParserException: Parsing text
```json
{
    "destination": "DEFAULT",
    "next_inputs": "today we're going to be talking about Lang chain expression language which is a pretty interesting idea that essentially allows us to write very minimalist code to build chains within line chain and for sure I think we'll see from this video we can use a lot of L chains more advanced features like parallel execution async and streaming very easily using the expression language rather than just the more typical approach to build Lang chain chains and in my opinion it's worth trying just for that I think we'll see that just using this you can build stuff very quickly that's not to say it doesn't have its cons but we'll dive into those later so let's just begin with what this expression language actually is so there's a page here in the line train dos talking about this expression language right so it's LC for short and yeah they just explain a few things you know we streaming acing parel execution so on and so on right but let's just jump into this notebook and we'll see more of how this actually works so there will be a link to this notebook as they usually is at the top of the video right now and I've WR all this in collab so you can do the same it's pretty straightforward we have a few prerequisites we're going to be using line chain of course we're going to be using anthropic the new Claude 2.1 model for our llm we're going to be using cave the embeddings and we're going to be using a dock array just so I can give you an example of parallel retrieval later on which is super interesting now the main things I think we would want to use the Expression language for is these three items here so we have super fast development of chains we have those Advanced features streaming acing parallel execution just work out of the box with these super fast and easy to set up and there's also easy integration with the other Lang chain products so Lang Smith and Lang serve if you are using those now let's take a look at what it actually looks like so to get started with this we're going to need a anthropic API key and you can get that by going to console anthropic tocom you'd come into here hopefully you have an account already and you can click get API keys and you're just going to get your API keys from there if you don't have an anthropic account I think there's still a like a very minor weight list so one I just recommend you sign up and you you'll get access pretty soon but so that you're not waiting you can also just use open AI so you would just swap chat anthropic here with chat openai and swap anthropic API key for openai API key and if you do do that you will also want to drop just drop these two arguments it'll make things easier so looking at this let's see we'll put our API key in here and once we have that we now have these three components we have a prompt a Model A chat model and a output passer okay now in typical L chain we would chain these together using the llm chain okay so you can see llm chain your prompt the L and the output passer okay what I'm going to do is take this prompt where're asking to give me a small report about a particular topic okay so the the input to that is going to be topic and you can see that here so we have topic artificial intelligence and it's obviously just going to Output a small report on that okay so let's run that and see what we get so it's running uh we create our chain running chain. run and we'll just print that output and we'll get this small like rort thing on on AI okay so all looks pretty good now how would we do that with the expression language well we use this this pipe operator and I'm going to go into detail as to how this actually functions because I think that's understanding how this pipe operator functions allows us to just understand what is actually happening here okay so that we can actually understand this abstraction rather than just blindly using it so we string things together right so we have our prompt followed by the model followed by output parer and rather than putting them into an llm chain or some other chain we just string them together with this pipe operator so I mean it's like for sure if I look at this it's kind of it's simpler than this right if you compare those two it's I would say also more flexible because we can just string things together but it's you know I think it's it's not so pythonic as to what we're used to whether or not that is a good or bad thing I'm undecided on like I really I like the minimalist approach here it looks great but it it's maybe hard to understand like if you if you don't understand the syntax and you on python very well this is going to be pretty confusing anyway let's run that so we create our chain using this new this expression language syntax and then we just rather than running run we run invoke and we pass a dictionary of input variables into there so we run this and yeah it's going to do the exact same thing we or very similar output to what we saw before okay so it gives us little report again okay looks cool so these two things this and this doing the exact same thing just different syntax now I think when you see that syntax of the pipe operator for the first time at least for me I was quite confused and I think most people would be confused the way that it works is pretty simple at least the idea behind how it works can be explained very easily what we see on the left of each pipe operator the output from that gets passed to what is on the right of the pipe operator okay and then the output from this is passed into this so it's it's literally piping things from the left of the pipe operators all the way through to the right of the pipe operators that's that's all it's really doing now how that pipe operator actually works is more not necessarily complicated it's probably a little bit hacky in my opinion but it's it's kind of interesting so this pipe operator when we apply it to an object in Python what it actually looks for within that object is this or method here right so if I come down to here we have this kind of confusing class called runnable but let's break it down a little bit okay so I'm going to do class and we're going to call it what still going to call it runable now when we initialize this class we run I'll see the init method here and within that we're going to pass a function right because the way that we're going to implement this is we're going to give a function into this class and we're going to use this class to transform this function into something that we can use this pipe operator on so we want to save that function within our runable class or object and then the next thing you see this is the part that makes the the pipe operator work okay so when a pipe operator is applied to an object it's going to look for the objects all method now the or method that needs to contain another function that we call other here now the way that you can think of this the funk and the other arguments here is that funk is kind of what is on the left of our pipe and other is what is on the right of our pipe okay so what we do is we create this chain function here which is going to consume a set arguments and keyword arguments so we can call it chain Funk as we do there our arguments and we have our keyword arguments now the reason that we set up with args and keyword arguments like this is because we don't know the names of the parameters that I going to be input into our function right so by doing this we can you know those parameter names can vary we can have more or less and this chain function will be able to handle those so we would do return other so our basically this function here that consumes the output from our function okay and again that function is going to take those ARs and keyword arguments okay so from that we would then return the the runnable here so this is going to be our like runnable version of that chain function so basically by doing that we're putting the uh this ability to run chains within each one of the functions that we pass through this actual chain okay so we can do multiple of these so we could have you know other two other three so on and so on now the final thing that we need to have here is a method that allows us to call and and begin this chain now I'm going to implement it with this we will see that line chain actually uses I think they use invoke so rather than call they would have invoke here and that starts to ch but I'm I'm just going to do call because I think it's simpler so that is our runnable function we can run that and I also have it here maybe I'll just run this one and what we want to do is use this Runner board to kind of wrap around different functions that we would like to run with this pipe operator approach to do that we're going to Define two very simple functions here one is add five one is multiply by two okay so let's run those and I'm going to wrap those with this runnable object that we've created and then using this approach right so we have uh we have the chain we're going to do add five and then rather than using the PIP operator I'm going to use the the all method directly and then within that all method I'm going to pass our multiply by two runnable okay so we have those and then we can just call our train so three to it and we get the value 16 which is that's correct so we do 3 + 5 take both those gives us eight and multiply those by two okay so it's correct it's run in the correct order now we can use this syntax or now that we use this or method we can also use the syntax that we see here with the pipe operator so let's try that okay you can you see we we now have this so yeah that's that's pretty interesting so we can you know we can build our own pipe operator functions using using this and this is what line chain is doing okay so when we see this line chain expression language this is what we're actually looking at which is an interesting way of putting things together now that's how it works let's have a look at how we actually use the Expression language itself so we saw already we can use the or operators or the pipe operators now let's put it together in an actual use case so I'm going to be using the coher embedding model you know if you you can also use open a eyes embedding model it's up to you but to get that API key I don't think there's a weight list for coh here so you can you should be able to jump straight into it you can go to dashboard. here.com you'd go to API keys and from the API Keys page you can you can create either a trial key or production key and you just use that so I'm going to add mine in here and I'm going to be using the cair embedding model so the the newest one from there which is very high performance embedding model I'm going to be using that to create two kind of like document stores that we have here okay so we have you know they're very small it's just for an example we have one where we have half the information in Vector sore document sore a and half the information in saw or do saw B you'll see why soon but for now what we're going to do is just use the first one okay so we're going to use a right so it contains information about me when my birthday is the one contains the year of my birthday so let's try putting information into the Vex store or retrieving information my vase store and then feeding that alongside the original query into a chain using the expression language now when we do this there's one important thing that we need to be aware of which is when we use this syntax just using this syntax and nothing else we we have like one input and one output to each of these items right each of these components so how you know how does that work when we have you know we have a context that we need to use here and also a question that we need to feed into our prompt and the way that we do that is by using this runnable parallel object so I've imported those here we have runable parallel and runable pass through the runable parallel which we have here first it allows us to run multiple chains or components in parallel and also extract multiple values from them right so here we're going to run retriever a and then for this question we're using this runnable pass through item what runnable pass through does is whatever was input into the retrieval or the runable parallel object it's just going to return that okay so it's literally a pass through for values that you pass into here so let's run all of that okay so we have our retriever a here that we're using we have our prompt template so on and so on right we have our retrieval that happens first so we have a query when was when was I born we're going to invoke that and this value is being passed into our retriever it's doing a search getting the context it's also being passed through here and going straight through to our prompt okay so then our prompt gets formatted with the question we have when was James born with the context we have the record we will have the records from here okay so V saw a so my birthday the actual date now what we will get here is unfortunately I do not have enough context to definitively State when James was born and it tells me what it found it found this little bit of information so it knows that my birthday is z but it does not specify the year that I was born okay so it can't actually fully answer the question but we can see that this chain is working it's going to do retrieval comparing soon our prompt model Alpha Passa whatever else it's going through everything now the cool thing with runnable parallel you might have guessed with what we have here is that it can run many things in parallel not just a retriever and you know passing through a question we can actually run multiple Retrievers in parallel or we can run multiple different components in parallel at the same time and this is one of the things that is very cool about the expression language is that it you know we we set these things up in parallel and like runnable parallel here is just going to do them in parallel right it's going to run those in parallel we don't have to deal with you know building or writing any of that code ourselves which is I think pretty cool so let's come down to here what I'm going to do is now that we're going to be retrieving information from two places I'm going to create a context a and a context B we're going to run that or we're going to initialize the The Prompt then our runnable parallel now we need to modify a little bit we need to add so we have retriever a we're now mapping that to context a and we have retriever B which we're going to map over to context B and then as before we have our question which is the runable pass through now the chain itself is exactly the same we still just have one like retrieval component there now because you know both our retrievals are being run in parallel within that abstraction so we're going to run that and now I'm going to say the same the same question when was I born okay so now it it knows based on the context provider James was born in 1994 okay stated in the second document with the page content James born in 1994 and maybe if I want to kind of say okay give me the date as well i' say um what date exactly which spawn and we actually get this which is odd because so it it says unfortunately the given context does not provide definitive information to answer the question what dat exactly was James born but then then it actually it gives us here so we have I don't know that there's a little bit of a lack of reasoning ability with Claude in this case clearly so my birthday is 7th December and I was born in 1994 I don't know why it's kind of surprising to me that I didn't get that but interesting but at least we can see that our chain is working correctly we can see that it's pulling in information from both our retrievers there which is cool and we're almost done with what I think are the essentials of the expression language there's just one more thing that I think is super important and it's basically line chains abstraction of doing what I showed you earlier where we created our own sort of runnable class and fed functions into it to create these you know things that we can run with the pipe operator so to do that in line chain they have these runable lambdas okay and this is why earlier on I called that class A runnable because here they they call them runnable lambdas so we have our our add five and our multiply by two I'm going to just come up here and show you what we we had earlier so yeah we have these two functions let's take those okay we can see runable it's what we were doing before so that we could use this let's do it again here all right so we have our add five and I'll multiply by two let's run this this time we're doing runnables but we're just doing them through Line train so our train is going to be at five multiply by two as we did before and as I mentioned you know line chain we have to use infol rather than just calling the object directly so we run that and yes as before we get 16 so yeah we can wrap our own functions using Lang chains runable Lambda here now when would we use that I mean there there are definitely different scenarios why we might want to use that but let me just show you something here which you know kind of bothers me a little bit and it's a good example where we might want to use this either use this or we'd probably want to adjust the output parer as well so we have let's run both of these what we see when we run this is one there's some leading white space here that we could do removing but it also starts each answer with this here's a short fact about artificial intelligence and then we have two double new line characters maybe I don't want that and I just want it to get straight to the fact so what I can do is use this runnable Lambda abstraction to to do that right so I'm going to define a function which is going to look within this string for a double new line within the string if that is in there we're going to split by double new lines and we're going to take everything that occurs after the double new lines now in the case that maybe there are multiple double new lines we're taking everything you know one from one to the end of the list that we would get from this and then we're joining everything back here okay so we're basically just dropping that first one the first part here so let's run that I'm going to wrap that within a runable line and then I'm going to put all those things together and I'm going to add the get fact runable to the end of my chain now let's invoke again and see what we get okay so there's no weird sarting text here and yeah we see with both of those it know it works so our a little runnable Lambda here works well okay so that is really everything I wanted to cover with the expression language you know I think there's there's other things that we can talk about and more to cover but this is I think pretty much everything you need to really get started with it and just understand what this abstraction is actually doing which like I said at the start it's important to understand because then at least we know what we're doing rather than just kind of you putting in these pipe operators and kind of thinking they should work when maybe we're doing something that doesn't make sense so I hope this has been useful for understanding the expression language you know there's pros and there's cons to using this now on the pros obviously there's the Min andless style of the code which is kind of
 raised following error:
Got invalid JSON object. Error: Expecting value: line 1 column 1 (char 0)

## Explorations - Web Links

In [10]:
import requests
from bs4 import BeautifulSoup

def scrap_text(url: str):
    try:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        
        # Extract all the text from the page
        text = soup.get_text(separator=' ', strip=True)
        return text
    
    except Exception as e:
        print(e)
        return f"Error: {e}"

## Explorations - Metadata

In [6]:
transcripts = YouTubeTranscriptApi.get_transcripts([video_id], languages=['en'])
ls = list(transcripts[0].values())[0]
import pandas as pd

df = pd.DataFrame.from_dict(ls)

filtered_df = df[(df['start'] >= 611) & (df['start'] <= 1224)]
filtered_df

Unnamed: 0,text,start,duration
226,begin this chain now I'm going to,612.959,6.761
227,implement it with this we will see that,615.880,6.280
228,line chain actually uses I think they,619.720,3.640
229,use,622.160,3.480
230,invoke so rather than call they would,623.360,5.919
...,...,...,...
449,what we we had earlier so yeah we have,1212.159,5.161
450,these two functions let's take those,1214.400,4.840
451,okay we can see runable it's what we,1217.320,3.960
452,were doing before so that we could use,1219.240,5.319


In [10]:
import re
soup = BeautifulSoup(requests.get('https://www.youtube.com/watch?v=DjuXACWYkkU').content)
pattern = re.compile('(?<=shortDescription":").*(?=","isCrawlable)')
description = pattern.findall(str(soup))[0].replace('\\n','\n')
print(description)

In this video, we will walk through the steps of building a research assistant from scratch with LangChain and LangSmith. We will cover prompting strategies, how to parallelize steps, and how to customize it to do research over any corpora of data.

Key Links:
Code from video: https://gist.github.com/hwchase17/69a8cdef9b01760c244324339ab64f0c
LangChain Template for Research Assistant: https://github.com/langchain-ai/langchain/tree/master/templates/research-assistant
GPT-Researcher Repo: https://github.com/assafelovic/gpt-researcher


In [21]:
from googleapiclient.discovery import build

api_key = 'AIzaSyDYyXnayylCG2L1ToqrZykiVA--QxZ7-3Y'
youtube = build('youtube', 'v3', developerKey=api_key)

# Fetch video details
request = youtube.videos().list(
    part="snippet,contentDetails,statistics",
    id=video_id
)
response = request.execute()



In [22]:
response

{'kind': 'youtube#videoListResponse',
 'etag': 'VB1pOZ9dsyaJCmlUHZOx-KOEwbk',
 'items': [{'kind': 'youtube#video',
   'etag': 'mFwyljJthlBCHJcUCN2cbl3du5U',
   'id': 'DjuXACWYkkU',
   'snippet': {'publishedAt': '2023-11-16T14:35:01Z',
    'channelId': 'UCC-lyoTfSrcJzA1ab3APAgw',
    'title': 'Building a Research Assistant from Scratch',
    'description': 'In this video, we will walk through the steps of building a research assistant from scratch with LangChain and LangSmith. We will cover prompting strategies, how to parallelize steps, and how to customize it to do research over any corpora of data.\n\nKey Links:\nCode from video: https://gist.github.com/hwchase17/69a8cdef9b01760c244324339ab64f0c\nLangChain Template for Research Assistant: https://github.com/langchain-ai/langchain/tree/master/templates/research-assistant\nGPT-Researcher Repo: https://github.com/assafelovic/gpt-researcher',
    'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/DjuXACWYkkU/default.jpg',
      'w

In [13]:
video_title = response['items'][0]['snippet']['title']

In [14]:
published_date = response['items'][0]['snippet']['publishedAt']

In [15]:
views = response['items'][0]['statistics']['viewCount']

In [93]:
system_message

'\n    You are a note taking assistant for a courses. \n    Given the following document, write key points.\n    If the document is not relevant, write "not relevant".\n    '

In [3]:
response

ChatCompletion(id='chatcmpl-8TClnBZvafYSdsb9WiFpkNfbp1GOt', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='The 2020 World Series was played at Globe Life Field in Arlington, Texas.', role='assistant', function_call=None, tool_calls=None))], created=1701971291, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=53, total_tokens=70))