In [10]:
import openai
from dotenv import load_dotenv

In [12]:
# Load the .env file from local
env_path = r'E:\YTReusable\.env'
load_dotenv(env_path)

True

In [14]:
#pip install -U langchain langchain-openai

# 1- chains/Runnable

In [17]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini")

In [31]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}")

chain = prompt | model | StrOutputParser()

In [33]:
chain.invoke({"topic": "cricket"})

'Why did the cricket team bring a ladder to the game?  \n\nBecause they heard the match was going to be a high score!'

In [35]:
from langchain_core.runnables import RunnableLambda

runnable = RunnableLambda(lambda x: str(x))
runnable.batch([7, 8, 9])

# Async variant:
# await runnable.abatch([7, 8, 9])

['7', '8', '9']

# 2 - Document loader

In [38]:
file_path = (
    "C://Users//amanr//OneDrive//Desktop//Cricket.pdf"
)

In [40]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(file_path)
pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [42]:
print(f"{pages[0].metadata}\n")
print(pages[0].page_content)

{'source': 'C://Users//amanr//OneDrive//Desktop//Cricket.pdf', 'page': 0}

10
ricket grew out of the many stick-and-
ball games played in England 500 years
ago. The word ‘bat’ is an old English word that
simply means stick or club. By the seventeenth
century, cricket had evolved enough to be
recognisable as a distinct game. Till the middle of
the eighteenth century, bats were roughly the same
shape as hockey sticks, curving outwards at the
bottom. There was a simple reason for this: the ball
was bowled underarm, along the ground and the
curve at the end of the bat gave the batsman the
best chance of making contact.
One of the peculiarities of cricket is that a
Test match can go on for five days and still end
C
Before you read
Sport is an integral part of a healthy life. It is one way in
which we amuse ourselves, compete with each other and
stay fit. Among the various sports such as hockey, football
and tennis, cricket appears to be the most appealing
national entertainment today. How m

# 3 - text splitter

In [45]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load example document
with open("Aman.txt") as f:
    Aman = f.read()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=10,
    chunk_overlap=2,
    length_function=len,
    is_separator_regex=False,
)
texts = text_splitter.create_documents([Aman])
print(texts[0])
print(texts[1])

page_content='Hello'
page_content='This is'


# 4 - store in chroma DB

In [48]:
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = TextLoader('Aman.txt').load()
text_splitter = CharacterTextSplitter(chunk_size=10, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)

In [50]:
from langchain_chroma import Chroma

db = Chroma.from_documents(documents, OpenAIEmbeddings())

In [52]:
query = "Welcome"
docs = db.similarity_search(query)
print(docs[0].page_content)

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Hello
This is Aman
Welcome to UFDS


# 5 - Memory and states

In [55]:
from langchain_core.messages import HumanMessage

model.invoke([HumanMessage(content="Hi! I'm Bob")])

AIMessage(content='Hi Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 11, 'total_tokens': 22, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_bd83329f63', 'finish_reason': 'stop', 'logprobs': None}, id='run-f64ce809-37e5-483b-8903-19a235ed2c19-0', usage_metadata={'input_tokens': 11, 'output_tokens': 11, 'total_tokens': 22, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [57]:
model.invoke([HumanMessage(content="What's my name?")])

AIMessage(content="I'm sorry, but I don't know your name. If you'd like to share it, feel free!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 11, 'total_tokens': 32, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-2c220115-824b-42d7-af65-f8c82557068c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 21, 'total_tokens': 32, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [59]:
from langchain_core.messages import AIMessage

model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

AIMessage(content='Your name is Bob! How can I help you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 33, 'total_tokens': 46, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-3689bf1c-9c58-4563-a6a1-0efef3b141b6-0', usage_metadata={'input_tokens': 33, 'output_tokens': 13, 'total_tokens': 46, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [61]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [65]:
config = {"configurable": {"thread_id": "abc123"}}

In [67]:
query = "Hi! I'm Bob."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()  # output contains all messages in state


Hi Bob! How can I assist you today?


In [69]:
query = "I am a data scientist"
input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)

In [71]:
query = "What do I do?"
input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)

In [73]:
output["messages"][-1].pretty_print()


As a data scientist, your responsibilities might include a variety of tasks, such as:

1. **Data Collection and Cleaning**: Gathering data from various sources, ensuring its accuracy and completeness, and preprocessing it to make it usable for analysis.

2. **Exploratory Data Analysis (EDA)**: Analyzing and visualizing data to uncover patterns, trends, and insights that can inform decision-making.

3. **Statistical Analysis**: Applying statistical methods to analyze data and test hypotheses.

4. **Machine Learning**: Developing, training, and deploying machine learning models to make predictions or automate decisions based on data.

5. **Communication**: Presenting your findings to stakeholders through data visualizations, reports, or presentations to help inform business strategies.

6. **Collaboration**: Working with other teams (like engineering, product management, and business) to understand their data needs and deliver impactful solutions.

7. **Continuous Learning**: Staying up