In [1]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
import langchain
from langchain.cache import InMemoryCache
import os
from dotenv import load_dotenv
langchain.cache = InMemoryCache()
load_dotenv()


True

In [2]:
chat = ChatOpenAI()

In [3]:
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    AIMessagePromptTemplate,
    SystemMessagePromptTemplate,
    PromptTemplate,
)

## Parse outputs

In [9]:

from langchain.output_parsers import CommaSeparatedListOutputParser
output_parser = CommaSeparatedListOutputParser()


In [10]:
output_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

In [12]:
reply = "Hello, how are you?"

In [13]:
output_parser.parse(reply)

['Hello', 'how are you?']

In [19]:
output_parser.get_format_instructions()

'Your response should be a list of comma separated values, eg: `foo, bar, baz`'

In [16]:
human_template = "{request}\n{format_instructions}"
human_prompt = HumanMessagePromptTemplate.from_template(human_template)

In [17]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [22]:
chat_prompt.format_prompt(request='give me a list of 5 fruits',
                          format_instructions=output_parser.get_format_instructions()).to_messages()

[HumanMessage(content='give me a list of 5 fruits\nYour response should be a list of comma separated values, eg: `foo, bar, baz`', additional_kwargs={}, example=False)]

In [32]:
model_request = chat_prompt.format_prompt(request='write a poem about stick',
                          format_instructions=output_parser.get_format_instructions()).to_messages()

In [33]:
result = chat(model_request)

In [34]:
result.content

"Straight and slender, standing tall,\nA humble stick, beloved by all.\nIn forests deep or parks so green,\nIts presence there, a timeless scene.\n\nA wand of power in children's hands,\nUnleashing magic in enchanted lands.\nFrom knights and warriors to wizards' spell,\nA stick's transformation, stories tell.\n\nA faithful companion on nature's trail,\nGuiding our steps, without a fail.\nA makeshift bridge across the stream,\nA sturdy support for a cherished dream.\n\nIn winter's grasp, a tool to slide,\nOn frozen ponds, a joyous ride.\nFrom snowball fights to snowmen's charm,\nA stick's laughter, in frosty arms.\n\nA painter's brush, in an artist's style,\nCreating masterpieces that beguile.\nSwirling colors upon canvas grand,\nA stick's strokes, a masterpiece's hand.\n\nA game of fetch with a loyal hound,\nChasing a stick, both lost and found.\nA moment of joy in a wagging tail,\nA bond unspoken, beyond the trail.\n\nA crackling fire, on a starlit night,\nA stick's sacrifice, a warm 

In [35]:
output_parser.parse(result.content)

['Straight and slender',
 'standing tall,\nA humble stick',
 'beloved by all.\nIn forests deep or parks so green,\nIts presence there',
 "a timeless scene.\n\nA wand of power in children's hands,\nUnleashing magic in enchanted lands.\nFrom knights and warriors to wizards' spell,\nA stick's transformation",
 "stories tell.\n\nA faithful companion on nature's trail,\nGuiding our steps",
 "without a fail.\nA makeshift bridge across the stream,\nA sturdy support for a cherished dream.\n\nIn winter's grasp",
 'a tool to slide,\nOn frozen ponds',
 "a joyous ride.\nFrom snowball fights to snowmen's charm,\nA stick's laughter",
 "in frosty arms.\n\nA painter's brush",
 "in an artist's style,\nCreating masterpieces that beguile.\nSwirling colors upon canvas grand,\nA stick's strokes",
 "a masterpiece's hand.\n\nA game of fetch with a loyal hound,\nChasing a stick",
 'both lost and found.\nA moment of joy in a wagging tail,\nA bond unspoken',
 'beyond the trail.\n\nA crackling fire',
 "on a star

## Datetime Parser

In [36]:
from langchain.output_parsers import DatetimeOutputParser
output_parser = DatetimeOutputParser()

In [37]:
output_parser.get_format_instructions()

'Write a datetime string that matches the \n            following pattern: "%Y-%m-%dT%H:%M:%S.%fZ". Examples: 1758-09-03T11:44:41.010048Z, 0362-06-08T09:38:50.850969Z, 0825-05-02T01:03:38.607040Z'

In [38]:
template_text = "{request}\n{format_instructions}"

In [39]:
human_prompt = HumanMessagePromptTemplate.from_template(template_text)

In [45]:
system_text = "you always reply to questions only in datetime patterns"
system_prompt = SystemMessagePromptTemplate.from_template(system_text)

In [51]:
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt])

In [52]:
query = chat_prompt.format_prompt(request='what is date India got independence',
                                  format_instructions=output_parser.get_format_instructions()).to_messages()

In [53]:
result = chat(query)

In [54]:
result.content

'1947-08-15T00:00:00.000000Z'

In [55]:
output_parser.parse(result.content)

datetime.datetime(1947, 8, 15, 0, 0)

### 2nd Approach

In [56]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [57]:
query = chat_prompt.format_prompt(request='what is date India got independence',
                                  format_instructions=output_parser.get_format_instructions()).to_messages()

In [58]:
result = chat(query)

In [59]:
result.content

'India got independence on August 15, 1947. Therefore, the datetime string for this event would be "1947-08-15T00:00:00.000000Z".'

In [60]:
from langchain.output_parsers import OutputFixingParser

In [61]:
output_parser

DatetimeOutputParser(format='%Y-%m-%dT%H:%M:%S.%fZ')

In [62]:
misformatted = result.content

In [64]:
outputfix = OutputFixingParser.from_llm(parser=output_parser, llm=chat)

In [65]:
outputfix.parse(misformatted)

datetime.datetime(1947, 8, 15, 0, 0)

### pydantic parsing

In [67]:
from langchain.output_parsers import PydanticOutputParser

In [69]:
from pydantic import BaseModel, Field

In [77]:
class Scientiest(BaseModel):
    name: str = Field(description='scientist')
    discoveries: list = Field(description='python list of discoveries')

In [78]:
parser = PydanticOutputParser(pydantic_object=Scientiest)

In [79]:
print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"name": {"title": "Name", "description": "scientist", "type": "string"}, "discoveries": {"title": "Discoveries", "description": "python list of discoveries", "type": "array", "items": {}}}, "required": ["name", "discoveries"]}
```


In [80]:
human_prompt = HumanMessagePromptTemplate.from_template("{request}\n{format_instructions}")

In [81]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [85]:
query = chat_prompt.format_prompt(request='tell me about a famous scientist',
                                format_instructions=parser.get_format_instructions()).to_messages()

In [86]:
result = chat(query, temperature=0)

In [87]:
result.content

'{\n  "name": "Albert Einstein",\n  "discoveries": ["Theory of Relativity", "Photoelectric Effect", "Brownian Motion"]\n}'

In [88]:
parser.parse(result.content)

Scientiest(name='Albert Einstein', discoveries=['Theory of Relativity', 'Photoelectric Effect', 'Brownian Motion'])

In [89]:
type(parser.parse(result.content))

__main__.Scientiest

## Saving and loading prompts

In [4]:
template_text = "Tell me about a {planent}"

In [5]:
prompt=PromptTemplate.from_template(template_text)

In [6]:
prompt

PromptTemplate(input_variables=['planent'], output_parser=None, partial_variables={}, template='Tell me about a {planent}', template_format='f-string', validate_template=True)

In [7]:
prompt.save('../data/planet_prompt.json')

In [8]:
from langchain.prompts import load_prompt

In [9]:
loaded_prompt = load_prompt('../data/planet_prompt.json')

In [10]:
loaded_prompt

PromptTemplate(input_variables=['planent'], output_parser=None, partial_variables={}, template='Tell me about a {planent}', template_format='f-string', validate_template=True)

## Data Connections

#### CSV loader

In [11]:
from langchain.document_loaders import CSVLoader

In [12]:
loader = CSVLoader('../data/penguins.csv')

In [13]:
data = loader.load()    

In [16]:
type(data[0])

langchain.schema.document.Document

In [17]:
data[0]

Document(page_content='species: Adelie\nisland: Torgersen\nbill_length_mm: 39.1\nbill_depth_mm: 18.7\nflipper_length_mm: 181\nbody_mass_g: 3750\nsex: MALE', metadata={'source': '../data/penguins.csv', 'row': 0})

In [20]:
print(data[0].page_content)

species: Adelie
island: Torgersen
bill_length_mm: 39.1
bill_depth_mm: 18.7
flipper_length_mm: 181
body_mass_g: 3750
sex: MALE


### HTML loader

In [24]:
from langchain.document_loaders import BSHTMLLoader

In [25]:
loader = BSHTMLLoader('../data/some_website.html')

In [26]:
data = loader.load()

In [31]:
data[0].page_content

'Heading 1'

### pdf loader

In [33]:
from langchain.document_loaders import PyPDFLoader

In [35]:
loader = PyPDFLoader('../data/SomeReport.pdf')

In [40]:
pages =  loader.load()

In [45]:
print(pages[0].page_content.replace('\n', ' '))

This is the first line PDF. This is the second line in the PDF. This is the third line in the PDF.


### Intergrations

In [46]:
from langchain.document_loaders import HNLoader

In [51]:
loader = HNLoader('https://news.ycombinator.com/item?id=37417027')

In [52]:
data = loader.load()

In [55]:
data[0].page_content

"noelwelsh 4 hours ago  \n             | next [–] \n\nLooks really nice. A few things that I think might make the presentation a bit clearer:1. Using polar coordinates makes the maths a lot cleaner than using Cartesian coordinates. However you then either need to explain polar coordinates, or you assume people remember polar coordinates from high school which is often not the case in my experience.2. I think the positioning of the points is a little bit opaque. I was expecting to see (r * cos theta, r * sin theta) and it took me by surprise to see an addition in there. Either just noting that stupid computer graphics libraries don't put the origin at the center, or adding that translation later (which is a chance to talk about function composition) might be beneficial.(I've written my own take on this same topic, starting at http://www.creativescala.org/creative-scala/polygons/  It will take a little bit more than 5 minutes to get through it :-)  It's really fun and you can do a huge a

In [56]:
human_prompt = HumanMessagePromptTemplate.from_template("Give a short Summary of the following hackernews comment\n{comment}")

In [57]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [69]:
query = chat_prompt.format_prompt(comment=data[0].page_content).to_messages()
""" iterate over all the data list and get the summary of each comment """

collection = []
for comment in data:
    query = chat_prompt.format_prompt(comment=comment.page_content).to_messages()
    result = chat(query)
    print(result.content)
    collection.append(result.content)


In this comment, the user noelwelsh provides feedback on a presentation, suggesting ways to make it clearer. They recommend using polar coordinates instead of Cartesian coordinates for cleaner math, but also mention the need to explain polar coordinates for those who may not be familiar with them. They also suggest clarifying the positioning of points and noting that computer graphics libraries don't always put the origin at the center. The user also mentions that they have written their own take on the same topic and provide a link to it.
The commenter suggests that the addition being discussed in the previous comments is intended to center the figures in the canvas.
The commenter appreciates the clear examples provided and suggests drawing inspiration from Daniel Shifman's book "Nature of code," particularly chapters 2 and 3.
The commenter expresses gratitude for the reference to a book and mentions that they will check it out.
The commenter is confused about how the concept or idea 

In [66]:
query

[HumanMessage(content="Give a short Summary of the following hackernews comment\nnoelwelsh 4 hours ago  \n             | next [–] \n\nLooks really nice. A few things that I think might make the presentation a bit clearer:1. Using polar coordinates makes the maths a lot cleaner than using Cartesian coordinates. However you then either need to explain polar coordinates, or you assume people remember polar coordinates from high school which is often not the case in my experience.2. I think the positioning of the points is a little bit opaque. I was expecting to see (r * cos theta, r * sin theta) and it took me by surprise to see an addition in there. Either just noting that stupid computer graphics libraries don't put the origin at the center, or adding that translation later (which is a chance to talk about function composition) might be beneficial.(I've written my own take on this same topic, starting at http://www.creativescala.org/creative-scala/polygons/  It will take a little bit mo

In [67]:
result = chat(query)

In [68]:
print(result.content)

The commenter believes that the presentation could be improved by using polar coordinates instead of Cartesian coordinates, but acknowledges that not everyone may be familiar with polar coordinates. They also suggest making the positioning of the points clearer, either by explaining that computer graphics libraries don't put the origin at the center or by adding a translation later. The commenter also mentions that they have written their own take on the topic and provide a link to it.


## Documentation Transformers

### Text Splitters

In [2]:
from langchain.text_splitter import CharacterTextSplitter

In [3]:
with open("../data/FDR_State_of_Union_1944.txt") as f:
    Speech_txt = f.read()

In [4]:
len(Speech_txt)

21927

In [5]:
len(Speech_txt.split())

3750

In [6]:
text_splitter = CharacterTextSplitter(chunk_size=1000, separator='\n\n')

In [8]:
texts = text_splitter.create_documents([Speech_txt])

In [10]:
texts[0]

Document(page_content="This Nation in the past two years has become an active partner in the world's greatest war against human slavery.\n\nWe have joined with like-minded people in order to defend ourselves in a world that has been gravely threatened with gangster rule.\n\nBut I do not think that any of us Americans can be content with mere survival. Sacrifices that we and our allies are making impose upon us all a sacred obligation to see to it that out of this war we and our children will gain something better than mere survival.\n\nWe are united in determination that this war shall not be followed by another interim which leads to new disaster- that we shall not repeat the tragic errors of ostrich isolationism—that we shall not repeat the excesses of the wild twenties when this Nation went for a joy ride on a roller coaster which ended in a tragic crash.", metadata={})

In [11]:
!pip install tiktoken



In [23]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)

In [24]:
texts = text_splitter.split_text(Speech_txt)

In [25]:
len(texts)

15

### Embedding

In [31]:
from langchain.embeddings import OpenAIEmbeddings

In [32]:
embeddings = OpenAIEmbeddings()

In [33]:
texts[0]

'This Nation in the past two years has become an active partner in the world\'s greatest war against human slavery.\n\nWe have joined with like-minded people in order to defend ourselves in a world that has been gravely threatened with gangster rule.\n\nBut I do not think that any of us Americans can be content with mere survival. Sacrifices that we and our allies are making impose upon us all a sacred obligation to see to it that out of this war we and our children will gain something better than mere survival.\n\nWe are united in determination that this war shall not be followed by another interim which leads to new disaster- that we shall not repeat the tragic errors of ostrich isolationism—that we shall not repeat the excesses of the wild twenties when this Nation went for a joy ride on a roller coaster which ended in a tragic crash.\n\nWhen Mr. Hull went to Moscow in October, and when I went to Cairo and Teheran in November, we knew that we were in agreement with our allies in our

In [34]:
embedded_text = embeddings.embed_query(texts[0])

In [35]:
embedded_text

[-0.017535953909972393,
 -0.02783315314915183,
 0.015425763317139884,
 -0.012494200267857608,
 -0.006647769522661585,
 0.0014265492137379068,
 -0.012574334052471912,
 0.004146925914927136,
 -0.01009018579810599,
 -0.005325561145203056,
 0.014958315774561852,
 0.021008421635215643,
 0.00028130314769610036,
 0.003983319554421584,
 -0.008414052972436992,
 0.005095176630852251,
 0.04559615417918188,
 0.010597699767329908,
 0.0034173741767141454,
 -0.001223710425913807,
 0.0011894866511719443,
 -0.019659501530557746,
 0.01302842642994216,
 -0.011505883590947874,
 -0.010751289754004623,
 -0.012086853762232203,
 0.022597742162393915,
 -0.016293880714111964,
 0.004627729088274218,
 -0.027485905817833986,
 0.005235410521096648,
 0.001514195627971288,
 -0.007599359029863646,
 -0.019579366814620913,
 -0.031492596911194194,
 -0.009629415838081851,
 0.005218716099050651,
 -0.006651108313938531,
 -0.015185361963296977,
 -0.01985983645775477,
 -0.007192012058576974,
 0.004464122262107401,
 -0.0049916

In [36]:
from langchain.document_loaders import CSVLoader

In [37]:
loader = CSVLoader('../data/penguins.csv')

In [38]:
data = loader.load()

In [40]:
[text.page_content for text in data]

['species: Adelie\nisland: Torgersen\nbill_length_mm: 39.1\nbill_depth_mm: 18.7\nflipper_length_mm: 181\nbody_mass_g: 3750\nsex: MALE',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: 39.5\nbill_depth_mm: 17.4\nflipper_length_mm: 186\nbody_mass_g: 3800\nsex: FEMALE',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: 40.3\nbill_depth_mm: 18\nflipper_length_mm: 195\nbody_mass_g: 3250\nsex: FEMALE',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: \nbill_depth_mm: \nflipper_length_mm: \nbody_mass_g: \nsex: ',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: 36.7\nbill_depth_mm: 19.3\nflipper_length_mm: 193\nbody_mass_g: 3450\nsex: FEMALE',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: 39.3\nbill_depth_mm: 20.6\nflipper_length_mm: 190\nbody_mass_g: 3650\nsex: MALE',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: 38.9\nbill_depth_mm: 17.8\nflipper_length_mm: 181\nbody_mass_g: 3625\nsex: FEMALE',
 'species: Adelie\nisland: Torgersen\nbill_length_mm: 39.2\nb

In [41]:
embedded_docs = embeddings.embed_documents([text.page_content for text in data])

In [43]:
len(embedded_docs)

344

## Vector Store

In [59]:
import chromadb

In [50]:
from langchain.vectorstores import Chroma

In [52]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader

In [53]:
# Load the document --> split into chunks --> embed each chunk --> store in vector store

In [55]:
loader = TextLoader('../data/FDR_State_of_Union_1944.txt')
documents = loader.load()

In [57]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
chunks = text_splitter.split_documents(documents)

In [58]:
embedding_function = OpenAIEmbeddings()

In [62]:
db = Chroma.from_documents(chunks, embedding_function, persist_directory="../data/speech_new_db")

In [63]:
db.persist()

In [64]:
db_new_connection = Chroma(persist_directory="../data/speech_new_db", embedding_function=embedding_function)

In [65]:
new_doc = "what did FDR say about the war?"

In [67]:
similar_docs = db_new_connection.similarity_search(new_doc)

In [68]:
similar_docs

[Document(page_content='Overconfidence and complacency are among our deadliest enemies. Last spring—after notable victories at Stalingrad and in Tunisia and against the U-boats on the high seas—overconfidence became so pronounced that war production fell off. In two months, June and July, 1943, more than a thousand airplanes that could have been made and should have been made were not made. Those who failed to make them were not on strike. They were merely saying, "The war\'s in the bag- so let\'s relax."\n\nThat attitude on the part of anyone—Government or management or labor—can lengthen this war. It can kill American boys.\n\nLet us remember the lessons of 1918. In the summer of that year the tide turned in favor of the allies. But this Government did not relax. In fact, our national effort was stepped up. In August, 1918, the draft age limits were broadened from 21-31 to 18-45. The President called for "force to the utmost," and his call was heeded. And in November, only three mont

In [78]:
loader = TextLoader('../data/Lincoln_State_of_Union_1862.txt')

In [79]:
documents = loader.load()

In [80]:
docs = text_splitter.split_documents(documents)

Created a chunk of size 608, which is longer than the specified 500
Created a chunk of size 539, which is longer than the specified 500
Created a chunk of size 686, which is longer than the specified 500


In [81]:
db_new_connection.add_documents(docs)

['1944a1c8-5096-11ee-a1bf-c85acf028b13',
 '1944a1c9-5096-11ee-9175-c85acf028b13',
 '1944a1ca-5096-11ee-a23e-c85acf028b13',
 '1944a1cb-5096-11ee-8265-c85acf028b13',
 '1944c83c-5096-11ee-a8b4-c85acf028b13',
 '1944c83d-5096-11ee-b8d1-c85acf028b13',
 '1944c83e-5096-11ee-9c9d-c85acf028b13',
 '1944c83f-5096-11ee-ab9a-c85acf028b13',
 '1944c840-5096-11ee-912c-c85acf028b13',
 '1944c841-5096-11ee-bb8f-c85acf028b13',
 '1944c842-5096-11ee-84da-c85acf028b13',
 '1944c843-5096-11ee-9ab6-c85acf028b13',
 '1944c844-5096-11ee-bc5a-c85acf028b13',
 '1944c845-5096-11ee-81de-c85acf028b13',
 '1944c846-5096-11ee-9eb6-c85acf028b13',
 '1944c847-5096-11ee-8614-c85acf028b13',
 '1944c848-5096-11ee-babc-c85acf028b13',
 '1944c849-5096-11ee-b74c-c85acf028b13',
 '1944c84a-5096-11ee-bcd2-c85acf028b13',
 '1944c84b-5096-11ee-a26b-c85acf028b13',
 '1944c84c-5096-11ee-b8bb-c85acf028b13',
 '1944c84d-5096-11ee-8c0f-c85acf028b13',
 '1944c84e-5096-11ee-9bfe-c85acf028b13',
 '1944c84f-5096-11ee-b669-c85acf028b13',
 '1944c850-5096-

In [84]:
similar_docs = db_new_connection.similarity_search('slavery')

In [85]:
similar_docs

[Document(page_content='As to the second article, I think it would be impracticable to return to bondage the class of persons therein contemplated. Some of them, doubtless, in the property sense belong to loyal owners, and hence provision is made in this article for compensating such. The third article relates to the future of the freed people. It does not oblige, but merely authorizes Congress to aid in colonizing such as may consent. This ought not to be regarded as objectionable on the one hand or on the other, insomuch as it comes to nothing unless by the mutual consent of the people to be deported and the American voters, through their representatives in Congress.\n\nI can not make it better known than it already is that I strongly favor colonization; and yet I wish to say there is an objection urged against free colored persons remaining in the country which is largely imaginary, if not sometimes malicious.\n\nIt is insisted that their presence would injure and displace white lab

In [87]:
query1= db_new_connection.similarity_search('cost of food law')

In [88]:
query1[0].metadata

{'source': '../data/FDR_State_of_Union_1944.txt'}

### Retrivers

In [90]:
retriever = db_new_connection.as_retriever()

In [92]:
results = retriever.get_relevant_documents('cost of food law')

In [93]:
results

[Document(page_content='That is the way to fight and win a war—all out—and not with half-an-eye on the battlefronts abroad and the other eye-and-a-half on personal, selfish, or political interests here at home.\n\nTherefore, in order to concentrate all our energies and resources on winning the war, and to maintain a fair and stable economy at home, I recommend that the Congress adopt:\n\n(1) A realistic tax law—which will tax all unreasonable profits, both individual and corporate, and reduce the ultimate cost of the war to our sons and daughters. The tax bill now under consideration by the Congress does not begin to meet this test.\n\n(2) A continuation of the law for the renegotiation of war contracts—which will prevent exorbitant profits and assure fair prices to the Government. For two long years I have pleaded with the Congress to take undue profits out of war.\n\n(3) A cost of food law—which will enable the Government (a) to place a reasonable floor under the prices the farmer ma

## MultiQuery Retriever

In [94]:
from langchain.document_loaders import WikipediaLoader

In [97]:
loader = WikipediaLoader('MKULTRA')

In [98]:
documents = loader.load()



  lis = BeautifulSoup(html).find_all('li')


In [99]:
len(documents)

9

In [100]:
from langchain.text_splitter import CharacterTextSplitter

In [101]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
docs = text_splitter.split_documents(documents)

Created a chunk of size 516, which is longer than the specified 500


In [102]:
len(docs)

19

In [103]:
from langchain.embeddings import OpenAIEmbeddings

In [104]:
embedding_function = OpenAIEmbeddings()

In [105]:
from langchain.vectorstores import Chroma

In [107]:
db = Chroma.from_documents(docs, embedding_function, persist_directory="../data/mkultra_db")
db.persist()

In [108]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chat_models import ChatOpenAI

In [109]:
query = "what is mkultra?"

In [117]:
llm = ChatOpenAI(temperature=0)

In [118]:
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=db.as_retriever(), llm=llm)

In [119]:
# Logging

import logging

logging.basicConfig(level=logging.INFO)

logging.getLogger('langchain.retrievers.multi_query')

<Logger langchain.retrievers.multi_query (INFO)>

In [123]:
## only retrive documents from the database with help of llm
unique_docs = retriever_from_llm.get_relevant_documents(query=query)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information on the purpose and objectives of the MKULTRA program?', '2. What are the key aspects and historical context of the MKULTRA project?', '3. Could you explain the significance and impact of MKULTRA in the field of covert operations and mind control experiments?']


In [124]:
unique_docs

[Document(page_content="Project MKUltra (or MK-Ultra) was an illegal human experimentation program designed and undertaken by the U.S. Central Intelligence Agency (CIA) and intended to develop procedures and identify drugs that could be used during interrogations to weaken people and force confessions through brainwashing and psychological torture. It began in 1953 and was halted in 1973. MKUltra used numerous methods to manipulate its subjects' mental states and brain functions, such as the covert administration of high doses of psychoactive drugs (especially LSD) and other chemicals without the subjects' consent, electroshocks, hypnosis, sensory deprivation, isolation, verbal and sexual abuse, and other forms of torture.MKUltra was preceded by Project ARTICHOKE. It was organized through the CIA's Office of Scientific Intelligence and coordinated with the United States Army Biological Warfare Laboratories. The program engaged in illegal activities, including the use of U.S. and Canadi

## Context Compresion

In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chat_models import ChatOpenAI

In [2]:
embedding_function = OpenAIEmbeddings()

In [3]:
db = Chroma(persist_directory="../data/mkultra_db", embedding_function=embedding_function)

In [4]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


In [7]:
# LLM for Compression
llm = ChatOpenAI(temperature=0)
# LLM -> LLMChainExtractor
compressor = LLMChainExtractor.from_llm(llm=llm)
#ContextualCompression
compressor_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=db.as_retriever())

In [8]:
docs = db.similarity_search('when was it declassified?')

In [9]:
docs

[Document(page_content='=== Work with the Army & CIA ===\nOlson served as a captain in the U.S. Army Chemical Corps. In December 1942, he got a call from Ira Baldwin, his thesis adviser at UoW and the future mentor of Sidney Gottlieb, who would go on to be the CIA\'s leading chemist and director of MK-ULTRA. Ira had been called to leave his University post to direct a secret program regarding the development of biological weapons, and wanted Olson to join him as one of the first scientists at what would become Fort Detrick. The army transferred him to Edgewood Arsenal in Maryland. A few months later, the Chemical Corps took over Detrick and established its secret Biologicals Warfare Laboratories.\nAt Camp Detrick, Baldwin worked with industrial partners such as George W. Merck and the U.S. military to establish the top secret U.S. bioweapons program beginning in 1943, during World War II, a time when interest in applying modern technology to warfare was high. Olson also worked with ex-

In [14]:
compressed_docs = compressor_retriever.get_relevant_documents('when was this declassified?')



In [17]:
print(compressed_docs[0].metadata['summary'])

Project MKUltra (or MK-Ultra) was an illegal human experimentation program designed and undertaken by the U.S. Central Intelligence Agency (CIA) and intended to develop procedures and identify drugs that could be used during interrogations to weaken people and force confessions through brainwashing and psychological torture. It began in 1953 and was halted in 1973. MKUltra used numerous methods to manipulate its subjects' mental states and brain functions, such as the covert administration of high doses of psychoactive drugs (especially LSD) and other chemicals without the subjects' consent, electroshocks, hypnosis, sensory deprivation, isolation, verbal and sexual abuse, and other forms of torture.MKUltra was preceded by Project ARTICHOKE. It was organized through the CIA's Office of Scientific Intelligence and coordinated with the United States Army Biological Warfare Laboratories. The program engaged in illegal activities, including the use of U.S. and Canadian citizens as unwitting

## Chains

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

In [2]:
humman_prompt = HumanMessagePromptTemplate.from_template("Make a funny joke about\n{topic}")

In [3]:
chat_prompt_template = ChatPromptTemplate.from_messages([humman_prompt])

In [4]:
chat = ChatOpenAI()

In [5]:
from langchain.chains import LLMChain

In [6]:
chain = LLMChain(llm=chat, prompt=chat_prompt_template)

In [7]:
result = chain.run(topic='dogs')

In [8]:
result

'Why did the dog bring a ladder to the bar?\n\nBecause he heard the drinks were on the house!'

## Simple Sequential Chain

In [13]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain, SimpleSequentialChain, SequentialChain
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)

# Topic Blog Post --> [[Outline ---> Crete Blogpost From Outline]] --> Blog PostText


In [3]:
template = "Give me a simple bullet line of the following blog post\n{topic}"
first_prompt = ChatPromptTemplate.from_template(template)
chain_one = LLMChain(llm=ChatOpenAI(), prompt=first_prompt)

In [4]:
template2 = "Write a  blog post using the outline\n{outline}"
second_prompt = ChatPromptTemplate.from_template(template2)
chain_two = LLMChain(llm=ChatOpenAI(), prompt=second_prompt)

In [6]:
full_chain = SimpleSequentialChain(chains=[chain_one, chain_two], verbose=True)

In [8]:
result =  full_chain.run('https://www.bbc.com/news/world-us-canada-57982050')



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mThe blog post from BBC News titled "Covid-19: US vaccination rates rise as cases surge" discusses the recent increase in COVID-19 vaccination rates in the United States amidst a surge in cases due to the Delta variant.

According to the article, the US has seen a significant rise in COVID-19 vaccination rates in recent weeks, with an average of 650,000 doses being administered daily. This increase is attributed to various factors, including the spread of the highly contagious Delta variant, rising hospitalizations, and efforts by the government and private sector to encourage vaccination.

The blog post highlights the vaccination progress in different states, emphasizing that areas with lower vaccination rates are experiencing higher case numbers and hospitalizations. It also mentions the ongoing efforts by health officials and organizations to combat vaccine hesitancy and increase access to vaccines, particularly in 

In [10]:
type(result)

str

In [12]:
print(result)

Title: COVID-19 Vaccination Rates Rise in the US Amidst Surge in Cases: A Race Against the Delta Variant

Introduction:
The COVID-19 pandemic has been an ongoing battle for over a year and a half now. Just as we thought we were making progress, the Delta variant emerged, posing new challenges for countries across the globe. In the United States, vaccination rates have recently seen a significant rise as cases surge due to the highly contagious Delta variant. This blog post explores the factors contributing to this increase, the progress made in different states, and the ongoing efforts to combat vaccine hesitancy.

The Rise in Vaccination Rates:
According to a recent article by BBC News, the US has witnessed a notable increase in COVID-19 vaccination rates, with an average of 650,000 doses being administered daily. This surge in vaccinations can be attributed to several factors. Firstly, the spread of the Delta variant has led to a rise in hospitalizations, causing concern among the pu

# Sequential chain

In [14]:
# Employee Performance review Input Text
# review the text and give a Summary
# Summary the weaknesses
# Weakness -> Improvement

In [18]:
template1 = 'Give a summary of the employee performance review\n{review}'
prompt1 = ChatPromptTemplate.from_template(template1)
chain1 = LLMChain(llm=ChatOpenAI(), prompt=prompt1, output_key='review_summary')

In [20]:
template2 = 'Identify Key employee Weakness from the review summary\n{review_summary}'
prompt2 = ChatPromptTemplate.from_template(template2)
chain2 = LLMChain(llm=ChatOpenAI(), prompt=prompt2, output_key='weakness')

In [26]:
template3= 'Give a improvement plan for the employee weakness\n{weakness}'
prompt3 = ChatPromptTemplate.from_template(template3)
chain3 = LLMChain(llm=ChatOpenAI(), prompt=prompt3, output_key='improvement_plan')

In [31]:
final_chain = SequentialChain(chains=[chain1, chain2, chain3],
                               input_variables=['review'],
                               output_variables=['review_summary', 'weakness', 'improvement_plan'],
                               verbose=True)

In [32]:
employee_review="""" Employee Name: John Smith
Position: Senior Software Engineer
Date of Review: October 3, 2023

Reviewer: Jane Doe
Position: Senior HR Manager

Performance Evaluation:

John Smith has been with our company for the past year, and I have had the pleasure of working closely with him during this time. I would like to provide an honest assessment of his performance.

Technical Skills: John possesses exceptional technical skills in software development. His ability to write clean and efficient code has been instrumental in several successful projects. He consistently demonstrates a deep understanding of programming languages and development tools.

Problem-Solving: John is an outstanding problem solver. He has a knack for identifying and addressing complex issues promptly, which has saved the team valuable time and resources on multiple occasions.

Collaboration: John is a great team player. He is always willing to collaborate with his colleagues, offer assistance when needed, and share his expertise. His positive attitude and willingness to help others have fostered a collaborative and productive work environment.

Communication: John communicates effectively with both technical and non-technical team members. He can explain complex technical concepts in a way that is easy for others to understand, which has been invaluable in cross-functional projects.

Time Management: John consistently meets project deadlines and manages his time efficiently. His ability to prioritize tasks and stay organized contributes to the overall success of our projects.

Leadership Potential: John has shown potential for leadership roles in the future. He takes initiative and has a natural ability to inspire and motivate his team members.

Adaptability: John is adaptable and open to learning new technologies and methodologies. He readily embraces changes in the industry and incorporates them into his work.

Areas for Improvement:

While John's performance has been commendable, there are a few areas where he could continue to develop:

Mentorship: Encouraging John to take on more mentorship responsibilities could help him further develop his leadership skills and provide valuable guidance to junior team members.

Public Speaking: John could benefit from improving his public speaking skills, which would enhance his ability to present technical information to larger audiences.

Work-Life Balance: While dedication to work is admirable, it's important for John to maintain a healthy work-life balance to avoid burnout.

Overall, John Smith has been an asset to our team. His technical expertise, problem-solving abilities, and collaborative spirit have contributed significantly to our projects' success. I look forward to seeing his continued growth and contributions to the company."""

In [34]:
result2 = final_chain(employee_review)



[1m> Entering new SequentialChain chain...[0m

[1m> Finished chain.[0m


In [30]:
print(result2)

Improvement Plan:

1. Mentorship Responsibilities:
a. Assign the employee as a mentor to junior colleagues or new hires. This will provide them with the opportunity to develop their leadership skills and guide others.
b. Provide training or workshops on effective mentoring and leadership techniques. This will equip the employee with the necessary tools and knowledge to excel in their mentorship responsibilities.
c. Encourage the employee to actively seek out mentoring opportunities within the organization. This could include participating in cross-functional projects, joining committees, or volunteering for leadership positions in professional associations.

2. Public Speaking Skills:
a. Offer a public speaking course or workshop to help the employee improve their skills in presenting technical information to larger audiences. This could include tips on structuring presentations, using visual aids effectively, and engaging the audience.
b. Provide opportunities for the employee to prac

In [35]:
type(result2)

dict

In [36]:
result2.keys()

dict_keys(['review', 'review_summary', 'weakness', 'improvement_plan'])

In [38]:
print(result2['review_summary'])

John Smith, a Senior Software Engineer, has been with the company for a year and has shown exceptional technical skills in software development. He is able to write clean and efficient code and has a deep understanding of programming languages and development tools. John is also an outstanding problem solver and has saved the team valuable time and resources by addressing complex issues promptly. He is a great team player, always willing to collaborate and offer assistance to his colleagues. John communicates effectively with both technical and non-technical team members and is able to explain complex concepts in an understandable way. He consistently meets project deadlines and manages his time efficiently. John has shown potential for leadership roles in the future and is adaptable to learning new technologies and methodologies. 

Areas for improvement include encouraging John to take on more mentorship responsibilities to further develop his leadership skills, improving his public s

In [39]:
result2['weakness']

'The key employee weakness identified from the review summary is the need for John to take on more mentorship responsibilities to further develop his leadership skills. Additionally, he is encouraged to improve his public speaking skills to enhance his ability to present technical information to larger audiences. Lastly, the review suggests that John should maintain a healthy work-life balance to avoid burnout.'

In [40]:
result2['improvement_plan']

"To address these weaknesses, the following improvement plan is proposed for John:\n\n1. Mentorship Development:\n- Assign John as a mentor to junior employees or new hires, providing guidance and support.\n- Encourage John to actively participate in mentorship programs within the organization.\n- Provide training or workshops specifically tailored to mentorship and leadership development.\n- Schedule regular check-ins with John to discuss his mentorship experiences and provide feedback.\n\n2. Public Speaking Skills:\n- Offer communication and public speaking training courses to help John improve his presentation skills.\n- Provide opportunities for John to practice public speaking, such as leading team meetings or presenting in front of small groups.\n- Encourage John to join Toastmasters or similar organizations to further enhance his public speaking abilities.\n- Provide constructive feedback and support to help John overcome any anxieties or challenges he may face.\n\n3. Work-Life 

## LLMRouterChains

In [1]:
# Student ask Physics Question
# how does manget work
# Explain the concept of magnetism
# INPUT --> ROUTER --> LLM Decides Chain --> OUTPUT

In [2]:
beginner_template ='You are a physics teacher. A student asks you the following question and explain it to understand simple terms\n{input}'
expert_template ='You are a physics Professor. A student asks you the following question and explain it to understand for advanced auidence level \n{input}'

In [4]:
# ROUTE PROMPT INFORMATION
# [{},....] --> NAME OF THE PROMPT, DESCRIPTION, PROMPT TEMPLATE

In [37]:
prompt_infos = [
    {
        'name': 'beginner',
        'description': 'Answer beginner level physics question',
        'prompt_template': beginner_template
    },

    {
        'name': 'expert',
        'description': 'Answer expert level physics question',
        'prompt_template': expert_template
    }
]

In [38]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

In [39]:
destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    prompt_template = p_info["prompt_template"]
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain

In [40]:
destination_chains

{'beginner': LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=ChatPromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], output_parser=None, partial_variables={}, template='You are a physics teacher. A student asks you the following question and explain it to understand simple terms\n{input}', template_format='f-string', validate_template=True), additional_kwargs={})]), llm=ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.7, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktok

In [41]:
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=llm,prompt=default_prompt)

In [42]:
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE

In [43]:
print(MULTI_PROMPT_ROUTER_TEMPLATE)

Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}}}
```

REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
{destinations}

<< INPUT >>
{{input}}

<< OUTPUT (must include ```json at the start of the respon

In [44]:
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos] 

In [45]:
destinations

['beginner: Answer beginner level physics question',
 'expert: Answer expert level physics question']

In [52]:
destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)

In [54]:
print(destinations_str)

beginner: Answer beginner level physics question
expert: Answer expert level physics question


In [48]:
from langchain.prompts import PromptTemplate
from langchain.chains.router.llm_router import LLMRouterChain,RouterOutputParser

In [49]:
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destination_string)

In [50]:
print(router_template)

Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}
```

REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
beginner: Answer beginner level physics question
expert: Answer expert level physics question

<< INP

In [56]:

router_prompt = PromptTemplate(template = router_template,
                                             input_variables=['input'],
                                                output_parser=RouterOutputParser())

In [57]:
print(router_prompt)

input_variables=['input'] output_parser=RouterOutputParser(default_destination='DEFAULT', next_inputs_type=<class 'str'>, next_inputs_inner_key='input') partial_variables={} template='Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.\n\n<< FORMATTING >>\nReturn a markdown code snippet with a JSON object formatted to look like:\n```json\n{{\n    "destination": string \\ name of the prompt to use or "DEFAULT"\n    "next_inputs": string \\ a potentially modified version of the original input\n}}\n```\n\nREMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.\nREMEMBER: "next_inputs" can just

In [59]:
router_chain = LLMRouterChain.from_llm(llm=llm, prompt=router_prompt)

In [60]:
from langchain.chains.router import MultiPromptChain

In [63]:
chain = MultiPromptChain(router_chain=router_chain, default_chain=default_chain, destination_chains=destination_chains, verbose=True)

In [62]:
chain('how does a magnet work?')



"A magnet works by creating a magnetic field around itself. This magnetic field is produced by the movement of tiny particles called electrons within the magnet. \n\nEvery atom has electrons that orbit around its nucleus. In most materials, like wood or plastic, the electrons move randomly and cancel out each other's magnetic fields, so they don't produce a noticeable magnetic effect. \n\nHowever, in certain materials like iron, nickel, and cobalt, the electrons align themselves in the same direction, creating a strong magnetic field. These materials are called ferromagnetic materials.\n\nWhen two magnets come close to each other, their magnetic fields interact. There are two main things that can happen. If the magnets are positioned with their opposite poles facing each other (North to South or South to North), they will attract each other. This is because the magnetic fields combine and become stronger, pulling the magnets together.\n\nOn the other hand, if the magnets are positioned