# **LangChain Decoded**

## Getting Started

In [None]:
# Install the LangChain package
!pip install langchain

In [None]:
# Install the OpenAI package
!pip install openai

In [None]:
# Configure the API key
import os

openai_api_key = os.environ.get('OPENAI_API_KEY', 'sk-XXX')

## Part 2: Embeddings

In [None]:
# Use OpenAI text embeddings for a text input
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

text = "This is a sample query."

query_result = embeddings.embed_query(text)
print(query_result)
print(len(query_result))

In [None]:
# Use OpenAI text embeddings for multiple text/document inputs
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

text = ["This is a sample query.", "This is another sample query.", "This is yet another sample query."]

doc_result = embeddings.embed_documents(text)
print(doc_result)
print(len(doc_result))

In [None]:
# Use fake embeddings to test your pipeline
from langchain.embeddings import FakeEmbeddings

embeddings = FakeEmbeddings(size=1481)

text = "This is a sample query."

query_result = embeddings.embed_query(text)
print(query_result)
print(len(query_result))

In [None]:
# Request with context length > 8191 throws an error
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

long_text = 'Hello ' * 10000

query_result = embeddings.embed_query(long_text)
print(query_result)

In [None]:
!pip install tiktoken

In [None]:
# Truncate input text length using tiktoken
import tiktoken
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

max_tokens = 8191
encoding_name = 'cl100k_base'

long_text = 'Hello ' * 10000

# Tokenize the input text before truncating it
encoding = tiktoken.get_encoding(encoding_name)
tokens = encoding.encode(long_text)[:max_tokens]

# Re-convert the tokens to a string before embedding
truncated_text = encoding.decode(tokens)

query_result = embeddings.embed_query(truncated_text)
print(query_result)
print(len(query_result))