In [30]:
import sys

sys.path.insert(0, 'D:/personal-git')
from key_api import apiKey

import os
os.environ['OPENAI_API_KEY'] = apiKey

# Schema
---

## Text

In [9]:
txt = "Hi, What is captial of Maharashtra?"

## Chat

In [10]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=.7)

chat(
    [
        SystemMessage(content="You are a nice AI bot that helps a user figure out where to travel in one short sentence"),
        HumanMessage(content="I like the beaches where should I go?"),
        AIMessage(content="You should go to Nice, France"),
        HumanMessage(content="What else should I do when I'm there?")
    ]
)

AIMessage(content='You should explore the charming Old Town and indulge in delicious French cuisine.', additional_kwargs={}, example=False)

## Document

In [11]:
from langchain.schema import Document

Document(page_content="This is my document. It is full of text that I've gathered from other places",
         metadata={
             'my_document_id' : 234234,
             'my_document_source' : "The LangChain Papers",
             'my_document_create_time' : 1680013019
         })

Document(page_content="This is my document. It is full of text that I've gathered from other places", metadata={'my_document_id': 234234, 'my_document_source': 'The LangChain Papers', 'my_document_create_time': 1680013019})

# Models
---

## Language Model

In [12]:
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-ada-001")
llm("What day comes after Friday?")

'\n\nSaturday.'

## Chat Model

In [13]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=1)
chat(
    [
        SystemMessage(content="You are an unhelpful AI bot that makes a joke at whatever the user says"),
        HumanMessage(content="I would like to go to New York, how should I do this?")
    ]
)

AIMessage(content="Why don't you just teleport there? It's the fastest and most convenient way! Just make sure you have the right coordinates handy.", additional_kwargs={}, example=False)

## Text Embedding Model

In [14]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
text = "Hi! It's time for the beach"
text_embedding = embeddings.embed_query(text)

print (f"Your embedding is length {len(text_embedding)}")
print (f"Here's a sample: {text_embedding[:5]}...")

Your embedding is length 1536
Here's a sample: [-0.00011466221621958539, -0.0031506523955613375, -0.0007831145194359124, -0.019504327327013016, -0.015125557780265808]...


# Prompts
---

In [15]:
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003")

prompt = """
Today is Monday, tomorrow is Wednesday.

What is wrong with that statement?
"""

llm(prompt)

'\nTuesday is missing.'

## Prompt Template

In [16]:
from langchain.llms import OpenAI
from langchain import PromptTemplate

llm = OpenAI(model_name="text-davinci-003")

template = """
I really want to travel to {location}. What should I do there?

Respond in one short sentence
"""

prompt = PromptTemplate(
    input_variables=["location"],
    template=template,
)

final_prompt = prompt.format(location='Rome')

print (f"Final Prompt: {final_prompt}")
print ("-----------")
print (f"LLM Output: {llm(final_prompt)}")

Final Prompt: 
I really want to travel to Rome. What should I do there?

Respond in one short sentence

-----------
LLM Output: Visit the Colosseum, the Roman Forum, and the Trevi Fountain.


## Example Selectors

In [17]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003")

example_prompt = PromptTemplate(
    input_variables=["input", "output"],
    template="Example Input: {input}\nExample Output: {output}",
)

#Examples of locations that nouns are found
examples = [
    {"input": "pirate", "output": "ship"},
    {"input": "pilot", "output": "plane"},
    {"input": "driver", "output": "car"},
    {"input": "tree", "output": "ground"},
    {"input": "bird", "output": "nest"},
]

In [18]:
#SemanticSimilarityExampleSelector will select examples that are similar to your input by semantic meaning
example_selector = SemanticSimilarityExampleSelector.from_examples(
    #This is the list of examples available to select from.
    examples, 
    #This is the embedding class used to produce embeddings which are used to measure semantic similarity.
    OpenAIEmbeddings(), 
    #This is the VectorStore class that is used to store the embeddings and do a similarity search over.
    FAISS, 
    #This is the number of examples to produce.
    k=2
)

In [19]:
similar_prompt = FewShotPromptTemplate(
    #The object that will help select examples
    example_selector=example_selector,
    #Your prompt
    example_prompt=example_prompt,
    #Customizations that will be added to the top and bottom of your prompt
    prefix="Give the location an item is usually found in",
    suffix="Input: {noun}\nOutput:",
    #What inputs your prompt will receive
    input_variables=["noun"],
)

In [20]:
my_noun = "student"
print(similar_prompt.format(noun=my_noun))

Give the location an item is usually found in

Example Input: driver
Example Output: car

Example Input: pilot
Example Output: plane

Input: student
Output:


In [22]:
llm(similar_prompt.format(noun=my_noun))

' classroom'

# Output Parser
---

In [5]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="text-davinci-003")

In [6]:
#How you would like your response structured. Basically a fancy prompt template
response_schemas = [
    ResponseSchema(name="bad_string", description="This a poorly formatted user input string"),
    ResponseSchema(name="good_string", description="This is your response, a reformatted response")
]

#How you would like to parse your output
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
#See the prompt template you created for formatting
format_instructions = output_parser.get_format_instructions()
print (format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```


In [7]:
template = """
You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

{format_instructions}

% USER INPUT:
{user_input}

YOUR RESPONSE:
"""

prompt = PromptTemplate(
    input_variables=["user_input"],
    partial_variables={"format_instructions": format_instructions},
    template=template
)

promptValue = prompt.format(user_input="welcom to califonya!")

print(promptValue)


You will be given a poorly formatted string from a user.
Reformat it and make sure all the words are spelled correctly

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"bad_string": string  // This a poorly formatted user input string
	"good_string": string  // This is your response, a reformatted response
}
```

% USER INPUT:
welcom to califonya!

YOUR RESPONSE:



In [8]:
llm_output = llm(promptValue)
llm_output

'```json\n{\n\t"bad_string": "welcom to califonya!",\n\t"good_string": "Welcome to California!"\n}\n```'

In [9]:
output_parser.parse(llm_output)

{'bad_string': 'welcom to califonya!', 'good_string': 'Welcome to California!'}

# Indexes
---

## Document Loaders

In [38]:
!pip install unstructured



In [39]:
from langchain.document_loaders import DirectoryLoader

loader = DirectoryLoader('d:/personal-git/Langchain-Learning',glob='dummy-data.txt')
docs = loader.load()

In [40]:
print (f"Found {len(docs)} comments")
print (f"Here's a sample:\n\n{''.join([x.page_content[:150] for x in docs[:2]])}")

Found 1 comments
Here's a sample:

Java is a high-level, class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible. It is


## Text Splitters

In [41]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

with open('d:/personal-git/Langchain-Learning/dummy-data.txt') as f:
    pg_work = f.read()    
print (f"You have {len([pg_work])} document")

You have 1 document


In [42]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 150,
    chunk_overlap  = 20,
)

texts = text_splitter.create_documents([pg_work])

In [43]:
print (f"You have {len(texts)} documents \n")
print ("PREVIEW::")
print (texts[0].page_content, "\n")
print (texts[1].page_content)

You have 301 documents 

PREVIEW::
Java is a high-level, class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible. It is 

as possible. It is a general-purpose programming language intended to let programmers write once, run anywhere (WORA),[17] meaning that compiled Java


## Retriever

In [44]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

loader = TextLoader('d:/personal-git/Langchain-Learning/dummy-data.txt')
documents = loader.load()

In [45]:
#Get embedding engine ready
embeddings = OpenAIEmbeddings()
#Embedd your texts
db = FAISS.from_documents(texts, embeddings)
#Init your retriever
retriever = db.as_retriever()

In [46]:
docs = retriever.get_relevant_documents("What are generics")

In [47]:
print("\n\n".join([x.page_content[:200] for x in docs[:2]]))

Generics
Main article: Generics in Java

Generics
Scripting/Compiler
Functional programming (Lambda, Streaming)


## Vector Store

In [48]:
embedding_list = embeddings.embed_documents([text.page_content for text in texts])

In [49]:
print (f"You have {len(embedding_list)} embeddings")
print (f"Here's a sample of one: {embedding_list[0][:3]}")

You have 301 embeddings
Here's a sample of one: [0.003983437510569549, -0.008915782819376916, -0.01288274569942308]


# Memory
---

In [50]:
from langchain.memory import ChatMessageHistory
from langchain.chat_models import ChatOpenAI

chat = ChatOpenAI(temperature=0)

history = ChatMessageHistory()
history.add_ai_message("hi!")
history.add_user_message("what is the capital of India?")

In [51]:
history.messages

[AIMessage(content='hi!', additional_kwargs={}, example=False),
 HumanMessage(content='what is the capital of India?', additional_kwargs={}, example=False)]

In [52]:
ai_response = chat(history.messages)
ai_response

AIMessage(content='The capital of India is New Delhi.', additional_kwargs={}, example=False)

In [54]:
history.add_ai_message(ai_response.content)
history.messages

[AIMessage(content='hi!', additional_kwargs={}, example=False),
 HumanMessage(content='what is the capital of India?', additional_kwargs={}, example=False),
 AIMessage(content='The capital of India is New Delhi.', additional_kwargs={}, example=False)]

# Chains
---

In [55]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SimpleSequentialChain

llm = OpenAI(temperature=1)

### Simple Sequential Chain

In [56]:
template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

#Location Chain
location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [57]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

# Holds my 'meal' chain
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [58]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)

In [59]:
review = overall_chain.run("Mumbai")



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mOne classic dish from Mumbai is pav bhaji. It is a spicy vegetable curry, served with a soft round bread roll called pav.[0m
[33;1m[1;3m
Pav Bhaji Recipe

Ingredients:
- 2 tablespoons vegetable oil
- 1 teaspoon cumin seeds
- 1 teaspoon mustard seeds
- 2 onions, chopped
- 3 cloves garlic, finely chopped
- 1 teaspoon ground ginger
- 1 teaspoon ground coriander
- 1 teaspoon garam masala
- 1 small green chilli, chopped
- 1 large potato, peeled and cubed
- 2 cups diced tomatoes 
- 2 cups cooked mixed vegetables, such as cauliflower, carrots, green beans, potatoes, etc.
- 2 tablespoons tomato paste
- 1 teaspoon chilli powder
- 2 tablespoons butter
- Fresh coriander leaves, finely chopped

Method : 
1. Heat the oil in a large pan. Add the cumin and mustard seeds, and fry until they start to pop.
2. Add the onions and garlic, and fry until golden.
3. Add the ginger, coriander, garam masala and chilli, and fry for a minute.

## Summarization Chain

In [61]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader('d:/personal-git/Langchain-Learning/dummy-data.txt')
documents = loader.load()

#Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)
#Split your docs into texts
texts = text_splitter.split_documents(documents)

In [64]:
print(len(texts))

73


In [71]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)
chain.run(texts[0:3])



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"Java is a high-level, class-based, object-oriented programming language that is designed to have as few implementation dependencies as possible. It is a general-purpose programming language intended to let programmers write once, run anywhere (WORA),[17] meaning that compiled Java code can run on all platforms that support Java without the need to recompile.[18] Java applications are typically compiled to bytecode that can run on any Java virtual machine (JVM) regardless of the underlying computer architecture. The syntax of Java is similar to C and C++, but has fewer low-level facilities than either of them. The Java runtime provides dynamic capabilities (such as reflection and runtime code"


CONCISE SUMMARY:[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"capabili

' Java is a popular, high-level programming language that is object-oriented and class-based, with syntax similarities to C and C++. It has dynamic capabilities, including reflection and runtime code modification, making it a favored choice for web applications. The Java platform and its underlying technologies were originally released under private licenses and later released under GPL-2.0-only. The OpenJDK JVM is free and open-source, and is the default for Linux.'