In [2]:
import os
from typing import List, Optional

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama

from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough

from langchain_community.document_loaders import PyPDFLoader
from langchain_chroma import Chroma
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

from duckduckgo_search import DDGS
from langchain.tools import tool

In [3]:
# llm = ChatOllama(model="llama3.1:latest",
#                  temperature=0.3,
#                  format="")

llm = ChatOllama(model="llama3-groq-tool-use:8b",
                 temperature=0.3,
                 format="json")

# llm = ChatGroq(model="llama3-groq-70b-8192-tool-use-preview", # "llama3-70b-8192"
#                temperature=0.3,
#                api_key=os.getenv("GROQ_API_KEY"))

## LCEL Demo

In [4]:
template="""Give me small report about {topic}"""

prompt_template=PromptTemplate.from_template(template)

llm_chain = prompt_template | llm

response = llm_chain.invoke({"topic": "Artificial Intelligence"})

In [5]:
response

AIMessage(content="Here's a brief report on Artificial Intelligence:\n\n**Overview**\n\nArtificial Intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and problem-solving. AI has been around for decades, but recent advances in machine learning algorithms and data storage have made it increasingly powerful.\n\n**Key Features**\n\n* **Machine Learning**: AI systems use complex algorithms to learn from data and improve their performance over time.\n* **Natural Language Processing**: AI can understand and generate human language, enabling applications like chatbots and voice assistants.\n* **Computer Vision**: AI can interpret and understand visual information from images and videos.\n* **Robotics**: AI is used in robotics to control and navigate robots that interact with physical environments.\n\n**Applications**\n\n* **Virtual Assistants**: AI-powe

In [11]:
"Give me small report about {topic}".format(topic="Artificial Intelligence")  

'Give me small report about AI'

In [6]:
print(type(response))
print("\n==============\n")
print(response.response_metadata)
print("\n==============\n")
print(response.usage_metadata)
print("\n==============\n")
print(response.content)

<class 'langchain_core.messages.ai.AIMessage'>


{'model': 'llama3.1:latest', 'created_at': '2024-07-25T17:46:02.016745Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 6836633875, 'load_duration': 28566333, 'prompt_eval_count': 17, 'prompt_eval_duration': 219291000, 'eval_count': 386, 'eval_duration': 6587842000}


{'input_tokens': 17, 'output_tokens': 386, 'total_tokens': 403}


Here's a brief report on Artificial Intelligence:

**Overview**

Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and problem-solving. AI has been around for decades, but recent advances in machine learning algorithms and data storage have made it increasingly powerful.

**Key Features**

* **Machine Learning**: AI systems use complex algorithms to learn from data and improve their performance over 

In [7]:
previous_ai_message = response.content

messages = [HumanMessage("Give me small report about Artificial Intelligence"),
            AIMessage(previous_ai_message)]

system_template = """Answer the user prompt below using the chat history provided below."""

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", system_template),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{user_prompt}")
    ]
)

prompt_template.pretty_print()


Answer the user prompt below using the chat history provided below.


[33;1m[1;3m{chat_history}[0m


[33;1m[1;3m{user_prompt}[0m


In [8]:
prompt_value = prompt_template.invoke({"user_prompt": "what was your first sentence in your previous response.",
                                       "chat_history": messages})

print(type(prompt_value))
print("\n==============\n")
print(prompt_value.messages)

<class 'langchain_core.prompt_values.ChatPromptValue'>


[SystemMessage(content='Answer the user prompt below using the chat history provided below.'), HumanMessage(content='Give me small report about Artificial Intelligence'), AIMessage(content="Here's a brief report on Artificial Intelligence:\n\n**Overview**\n\nArtificial Intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and problem-solving. AI has been around for decades, but recent advances in machine learning algorithms and data storage have made it increasingly powerful.\n\n**Key Features**\n\n* **Machine Learning**: AI systems use complex algorithms to learn from data and improve their performance over time.\n* **Natural Language Processing**: AI can understand and generate human language, enabling applications like chatbots and voice assistants.\n* **Computer Vision**: AI can interpret

In [9]:
llm_chain = prompt_template | llm

response = llm_chain.invoke({"user_prompt": "what was your first sentence in your previous response.",
                            "chat_history": messages})

print(response.content)

My first sentence in my previous response was:

"Here's a brief report on Artificial Intelligence:"


## RunnableLambda & RunnableParallel & RunnablePassThrough

In [25]:
def change_response(res):
    return "++START++\n" + res.content + "\n++END++"

change_response_func = RunnableLambda(change_response)


template="""Give me small report about {topic}"""

prompt_template=PromptTemplate.from_template(template)

llm_chain = prompt_template | llm | change_response_func

response = llm_chain.invoke({"topic": "Artificial Intelligence"})

print(response)

++START++
Here's a brief report on Artificial Intelligence:

**Introduction**

Artificial Intelligence (AI) refers to the development of computer systems that can perform tasks that typically require human intelligence, such as learning, problem-solving, and decision-making. AI has been rapidly evolving over the past few decades and has become an essential part of our daily lives.

**History**

The concept of AI dates back to ancient civilizations, but the modern era of AI began in the 1950s with the development of the first computer programs that could simulate human thought processes. The Dartmouth Summer Research Project on Artificial Intelligence in 1956 is considered a significant milestone in the history of AI.

**Key Aspects**

There are several key aspects of AI:

1. **Machine Learning**: This involves training algorithms to learn from data and improve their performance over time.
2. **Natural Language Processing (NLP)**: This enables computers to understand, interpret, and gen

In [31]:
def get_animal(a):
    return "lion"

def get_random_word(b):
    return "meat"

change_response_func = RunnableLambda(change_response)


template="""Use given words in one sentence: {animal} and {random_word}. Also consider the word provided by user: {user_prompt}"""

prompt_template=PromptTemplate.from_template(template)

retrieval = RunnableParallel(
    {
        "animal": RunnableLambda(get_animal), "random_word": RunnableLambda(get_random_word),
        "user_prompt": RunnablePassthrough()
    }
)

llm_chain = retrieval | prompt_template | llm

response = llm_chain.invoke("blood")

print(response.content)

Here's a sentence using all three words:

The lion, famished from its long hunt, devoured its prey with great relish, savoring the taste of fresh meat and the rich, crimson blood that still pulsed within its lifeless body.


## RAG Pipeline

In [6]:
document_loader = PyPDFLoader("/Users/toygunkarabas/Development/AI-Tutorials/test_cases/Categorical-and-numerical-attribute data clustering based on a unified similarity metric without knowing cluster number.pdf")
docs = document_loader.load()
docs

[Document(metadata={'source': '/Users/toygunkarabas/Development/AI-Tutorials/test_cases/Categorical-and-numerical-attribute data clustering based on a unified similarity metric without knowing cluster number.pdf', 'page': 0}, page_content='Categorical-and-numerical-attribute data clustering based on a uniﬁed\nsimilarity metric without knowing cluster number\nYiu-ming Cheunga,b,n, Hong Jiaa\naDepartment of Computer Science and Institute of Computational and Theoretical Studies, Hong Kong Baptist University, Hong Kong, China\nbUnited International College, Beijing Normal University-Hong Kong Baptist University, Zhuhai, China\narticle info\nArticle history:\nReceived 19 March 2012Received in revised form29 December 2012Accepted 23 January 2013\nAvailable online 31 January 2013\nKeywords:\nClustering\nSimilarity metricCategorical attributeNumerical attributeNumber of clustersabstract\nMost of the existing clustering approaches are applicable to purely numerical or categorical data only,\nb

In [4]:
embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))

vectorstore = Chroma.from_documents(
    docs,
    embedding=embeddings,
)

In [6]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

retriever.invoke("mixed data clustering algorithm")

[Document(metadata={'page': 4, 'source': '/Users/toygunkarabas/Development/AI-Tutorials/test_cases/Categorical-and-numerical-attribute data clustering based on a unified similarity metric without knowing cluster number.pdf'}, page_content='4. Iterative clustering algorithm\nIn this section, we will present an iterative clustering algo-\nrithm based on the proposed object-cluster similarity metric to\nconduct clustering analysis.\nThis paper concentrates on hard partition only, i.e., qijAf0,1g,\nalthough it can be easily extended to the soft partition in terms of\nposterior probability. Under the circumstances, given a set of N\nobjects, the optimal Qn¼fqn\nijgin Eq. (1)can be given by\nqn\nij¼1i f sðxi,CjÞZsðxi,CrÞ81rrrk,\n0 otherwise ,(\nð22Þ\nwhere i¼1,2,... ,Nand j¼1,2,... ,k. That is, each object xiwill be\nassigned to the cluster that has the largest object-cluster similarity\nwith it among the kclusters. Therefore, an iterative algorithm can\nbe conducted as Algorithm 1 to implem

In [21]:
template = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

prompt_template = ChatPromptTemplate.from_messages([("human", template)])
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt_template | llm

In [22]:
response = rag_chain.invoke("tell me about mixed data clustering algorithm")

print(response.content)

Based on the provided context, here is an overview of the Mixed Data Clustering Algorithm:

**Overview:**
The Mixed Data Clustering Algorithm (OCIL) is a parameter-free algorithm that can handle mixed data sets containing both categorical and numerical attributes. It uses a novel similarity metric called object-cluster similarity to measure the similarity between objects in different clusters.

**Key Features:**

1. **Parameter-Free:** The OCIL algorithm does not require any parameters to be set, making it easy to use.
2. **Mixed Data Handling:** The algorithm can handle data sets with both categorical and numerical attributes.
3. **Object-Cluster Similarity Metric:** A novel similarity metric that measures the similarity between objects in different clusters.

**Comparison with Other Algorithms:**
The OCIL algorithm was compared with k-means, k-prototype, and k-modes algorithms on mixed data sets. The results showed that OCIL outperformed these algorithms in terms of clustering accura

In [23]:
retrieved_docs = retriever.invoke("tell me about mixed data clustering algorithm")
print(retrieved_docs[0].page_content)

suffered the most penalization and gained the least cluster mem-
bers. This penalization has been transmitted and strengthened
during the following iterations and after the fourth epoch, no data
points were assigned to the second cluster due to its low weight. If
the iteration continues, one more epoch later we can get l2¼0,
which indicates that this cluster has been totally eliminated from
the hypothetic cluster model.
6. Experiments
This section investigates the effectiveness of the proposed
approaches for data clustering. We applied them to various data
sets obtained from UCI Machine Learning Data Repository (URL:
http://archive.ics.uci.edu/ml/ ) and compared their performance
with the existing counterparts. In the experiments, the clustering
accuracy [37] was estimated by
ACC ¼PN
i¼1dðci,map ðliÞÞ
N,
where Nis the number of instances in the data set, cistands for
the provided label, map ðliÞis a mapping function which maps the
obtained cluster label lito the equivalent label from t

# Summarization Pipeline

In [12]:
map_template = """
You are an expert at summarizing given context. The summary should at least 10 sentences and at most 25 sentences. Here is the context:

{context}
"""

map_system_template = PromptTemplate.from_template(map_template)
map_chain = map_system_template | llm


reduce_template = """
You are an expert at summarizing given context considering user prompt. 

Here is the user prompt:
{user_prompt}

Here is the context:
{context}
"""

reduce_system_template = PromptTemplate.from_template(reduce_template) # ChatPromptTemplate.from_messages([("system", reduce_template)])
reduce_chain = reduce_system_template | llm

In [13]:
page_summaries = []

for doc in docs:
    response = map_chain.invoke({"context": doc.page_content})
    page_summaries.append(response.content)
    
page_summaries

['Here is a summary of the given context in 17 sentences:\n\nA clustering approach for mixed data composed of numerical and categorical attributes has been proposed, which eliminates the gap between similarity metrics for categorical and numerical data. The approach presents a general clustering framework based on object-cluster similarity and develops a unified similarity metric that can be applied to data with categorical, numerical, and mixed attributes. An iterative clustering algorithm is developed within this framework, whose performance is experimentally demonstrated on different benchmark data sets.\n\nThe algorithm addresses the challenging problem of selecting the number of clusters, which is crucial in clustering analysis but often not available from a practical viewpoint. The proposed approach presents a penalized competitive learning algorithm that can determine the number of clusters automatically by gradually eliminating redundant clusters.\n\nExperimental results show t

In [15]:
page_summaries_str = "/n ".join(page_summaries)
print(page_summaries_str)

Here is a summary of the given context in 17 sentences:

A clustering approach for mixed data composed of numerical and categorical attributes has been proposed, which eliminates the gap between similarity metrics for categorical and numerical data. The approach presents a general clustering framework based on object-cluster similarity and develops a unified similarity metric that can be applied to data with categorical, numerical, and mixed attributes. An iterative clustering algorithm is developed within this framework, whose performance is experimentally demonstrated on different benchmark data sets.

The algorithm addresses the challenging problem of selecting the number of clusters, which is crucial in clustering analysis but often not available from a practical viewpoint. The proposed approach presents a penalized competitive learning algorithm that can determine the number of clusters automatically by gradually eliminating redundant clusters.

Experimental results show the effic

In [18]:
final_response = reduce_chain.invoke({"user_prompt": "Give me a comprehensive summmary with all key points.",
                                      "context": page_summaries_str})
print(final_response.content)

It seems like you've provided a summary of various papers related to data clustering and machine learning. Here's a breakdown of the 20 sentences:

**Papers and Research**

* The first paper [27] discusses the impact of dissimilarity measures on the k-modes clustering algorithm.
* Paper [28] by Akaike presents a new approach to statistical model identification.
* Schwarz's work in [29] focuses on estimating the dimension of a model.
* Cheung's research in [30] involves a competitive and cooperative learning approach to robust data clustering.

**Survey Papers**

* Filippone et al.'s survey in [31] covers kernel and spectral methods for clustering.
* Jain's paper in [32] provides an overview of 50 years of progress in data clustering beyond k-means.

**New Approaches**

* Basak and Krishnapuram's work in [33] proposes an interpretable hierarchical clustering method using an unsupervised decision tree.
* Lozano et al.'s research in [34] compares four initialization methods for the k-mean

# Tool/Function Calling

In [4]:
@tool
def search_web(query: str, max_results: Optional[int] = 5) -> List:
    """
    Search for text on duckduckgo.com.

    Args:
        query (str): The query to search for.
        max_results Optional[int]: The maximum number of search results to retrieve (default 5).
    Returns:
        List of search results as strings.
    """    
    # Searching for pdf files
    with DDGS() as ddgs:
        results = [r for r in ddgs.text(f'{query}', region='wt-wt', safesearch='off', max_results=max_results)]
    return results


llm_with_tools = llm.bind_tools(tools=[search_web])

In [5]:
template = "You are a helpfull assistant. Here is the user prompt: {user_prompt}"

prompt_template = PromptTemplate.from_template(template)

chain = prompt_template | llm_with_tools

In [6]:
response = chain.invoke({"user_prompt": "what is the latest openai model"})

In [7]:
response

AIMessage(content='', response_metadata={'model': 'llama3-groq-tool-use:8b', 'created_at': '2024-07-26T09:34:33.659459Z', 'message': {'role': 'assistant', 'content': '', 'tool_calls': [{'function': {'name': 'search_web', 'arguments': {'query': 'latest OpenAI model'}}}]}, 'done_reason': 'stop', 'done': True, 'total_duration': 670534500, 'load_duration': 26419250, 'prompt_eval_count': 221, 'prompt_eval_duration': 218982000, 'eval_count': 26, 'eval_duration': 424005000}, id='run-db2565e2-832e-4c57-b5ad-98134c3d45d9-0', tool_calls=[{'name': 'search_web', 'args': {'query': 'latest OpenAI model'}, 'id': 'cd6ac7a1-b7c7-4001-bb04-393d75ce83da', 'type': 'tool_call'}], usage_metadata={'input_tokens': 221, 'output_tokens': 26, 'total_tokens': 247})

In [8]:
response.tool_calls

[{'name': 'search_web',
  'args': {'query': 'latest OpenAI model'},
  'id': 'cd6ac7a1-b7c7-4001-bb04-393d75ce83da',
  'type': 'tool_call'}]

In [9]:
response.tool_calls[0]["args"]

{'query': 'latest OpenAI model'}

In [10]:
def check_response(response):
    try:
        if response.tool_calls!=[]:
            tool_args = response.tool_calls[0]["args"]
            search_results = search_web.invoke(tool_args)
            return search_results
        else:
            return response.content
    except:
        return response.content


template = "You are a helpfull assistant. Here is the user prompt: {user_prompt}"

prompt_template = PromptTemplate.from_template(template)

chain_with_runtool = prompt_template | llm_with_tools | RunnableLambda(check_response)

In [11]:
final_response = chain_with_runtool.invoke({"user_prompt": "get latest news about latest openai model"})

In [12]:
final_response

[{'title': 'OpenAI unveils newest AI model, GPT-4o | CNN Business',
  'href': 'https://www.cnn.com/2024/05/13/tech/openai-altman-new-ai-model-gpt-4o/index.html',
  'body': 'OpenAI on Monday announced its latest artificial intelligence large language model that it says will make ChatGPT smarter and easier to use. The new model, called GPT-4o, is an update from the ...'},
 {'title': 'OpenAI Says It Has Begun Training a New Flagship A.I. Model',
  'href': 'https://www.nytimes.com/2024/05/28/technology/openai-gpt4-new-model.html',
  'body': 'May 28, 2024. OpenAI said on Tuesday that it had begun training a new flagship artificial intelligence model that would succeed the GPT-4 technology that drives its popular online chatbot, ChatGPT ...'},
 {'title': 'Exclusive: OpenAI working on new reasoning technology under code name ...',
  'href': 'https://www.reuters.com/technology/artificial-intelligence/openai-working-new-reasoning-technology-under-code-name-strawberry-2024-07-12/',
  'body': 'Ju