<a href="https://colab.research.google.com/github/imaabay/CA2_Repository/blob/main/method1/gpt_experiments/ST01D_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Experimenting ToT using LangGraph

In [None]:
!pip install langchain langchain-community langchain-openai langgraph

Collecting langchain-community
  Downloading langchain_community-0.3.8-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.2.9-py3-none-any.whl.metadata (2.6 kB)
Collecting langgraph
  Downloading langgraph-0.2.53-py3-none-any.whl.metadata (15 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain
  Downloading langchain-0.3.8-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.15 (from langchain)
  Downloading langchain_core-0.3.21-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  

In [None]:
from langchain_openai import OpenAI, ChatOpenAI
from langchain import PromptTemplate
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")

In [None]:
from langsmith import Client

client = Client()

## Setup evaluation metrics

In [None]:
from langchain_openai import ChatOpenAI # Try ChatAnthropic as well
from langchain_core.prompts.prompt import PromptTemplate
from langsmith.evaluation import LangChainStringEvaluator

_PROMPT_TEMPLATE = """
  You are an expert tasked with evaluating the explainability of large language model-generated answers for medical diagnoses.
  Your role is to assess whether the given answers provide sufficient explanation and clarity for a user to understand the medical diagnosis.
  You are assessing the following question:
  {query}
  Here is the real answer:
  {context}
  You are assessing the following predicted answer:
  {result}
"""

PROMPT = PromptTemplate(
    input_variables = ["query", "context", "result"],
    template = _PROMPT_TEMPLATE
)

eval_llm = ChatOpenAI(temperature=0, model="gpt-4-turbo")

evaluators = [
  LangChainStringEvaluator("context_qa", config={"llm": eval_llm, "prompt": PROMPT}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "conciseness"}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "coherence"}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "detail"}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "helpfulness"}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "depth"}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "insensitivity"}),
  LangChainStringEvaluator("labeled_criteria", config={"criteria": "harmfulness"}),

]

## Construct graph
Ref: https://cobusgreyling.medium.com/langchain-langsmith-llm-guided-tree-of-thought-47a2cd5bcfca


In [None]:
import operator
from typing import Annotated, TypedDict
from pydantic import BaseModel
from langchain_openai import AzureChatOpenAI
from langgraph.constants import Send
from langgraph.graph import END, StateGraph, START

In [None]:
model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Define prompts for each step of the process

step1_prompt = """
You are an AI chatbot designed to assist doctors in diagnosing patients. Your role is to provide accurate diagnoses based on the {input}. Answer by exploring differential diagnosis.
Consider a variety of factors such as {factors}
"""

step2_prompt = """
For each diagnosis, evaluate their potential. Consider their pros and cons, explainability of the output, clarity, relevance, logical flow.
Assign a probability of success and a confidence level to each option based on these factors.
Solutions:
{solutions}
"""

step3_prompt = """
For each diagnosis, deepen the thought process. Aim to provide explanatory details on the thought process, rather than just state conclusions,
including briefly noting why some thoughts were deemed less ideal.
Evaluation:
{review}
"""

step4_prompt = """
Based on the evaluations, rank the diagnosis in order of promise. Provide a justification for each ranking and offer any final thoughts or considerations for each solution
Detailed analysis:
{deepen_thought_process}
"""

In [None]:
# Define data structure of AI outputs
class Solutions(BaseModel):
  solutions: list[str]

class Review(BaseModel):
  review: str

class DeepThought(BaseModel):
  deep_thought: str

class RankedSolutions(BaseModel):
  ranked_solutions: str

# Define the overall state of the process
class OverallState(TypedDict):
  input: str
  factors: str
  solutions: Annotated[list[str], operator.add]
  reviews: Annotated[list[str], operator.add]
  deep_thoughts: Annotated[list[str], operator.add]
  ranked_solutions: str

# Define the state for individual solution proccessing
class SolutionState(TypedDict):
  solution: str

# Graph component functions
def generate_solutions(state: OverallState):
  # Generate initial solutions based on the input problem and factors
  prompt = step1_prompt.format(input=state["input"], factors=state["factors"])
  response = model.with_structured_output(Solutions).invoke(prompt)
  return {"solutions": response.solutions}

def evaluate_solution(state: SolutionState):
  # Evaluate each solution individually
  prompt = step2_prompt.format(solutions=state["solution"])
  response = model.with_structured_output(Review).invoke(prompt)
  return {"reviews": [response.review]}

def deepen_thought(state: SolutionState):
  # Perform deeper analysis on each solution
  prompt = step3_prompt.format(review=state["solution"])
  response = model.with_structured_output(DeepThought).invoke(prompt)
  return {"deep_thoughts":[response.deep_thought]}

def rank_solutions(state: OverallState):
  # Rank all the solutions based on the deep analysis:
  deep_thoughts = "\n\n".join(state["deep_thoughts"])
  prompt = step4_prompt.format(deepen_thought_process=deep_thoughts)
  response = model.with_structured_output(RankedSolutions).invoke(prompt)
  return {"ranked_solutions": response.ranked_solutions}

In [None]:
# Define the mapping logic for parallel processing

def continue_to_evaluation(state: OverallState):
  # Create parralel branches for evaluating each solution
  return [Send("evaluate_solutions", {"solution": s}) for s in state["solutions"]]

def continue_to_deep_thought(state: OverallState):
  # Create parallel branches for deep thinking on each evaluation
  return [Send("deepen_thought", {"solution": r}) for r in state["reviews"]]

In [None]:
# Construct the graph
graph = StateGraph(OverallState)

# Add nodes to the graph
graph.add_node("generate_solutions", generate_solutions)
graph.add_node("evaluate_solutions", evaluate_solution)
graph.add_node("deepen_thought", deepen_thought)
graph.add_node("rank_solutions", rank_solutions)

# Add edges to connect the nodes
graph.add_edge(START, "generate_solutions")
graph.add_conditional_edges("generate_solutions", continue_to_evaluation, ["evaluate_solutions"])
graph.add_conditional_edges("evaluate_solutions", continue_to_deep_thought, ["deepen_thought"])
graph.add_edge("deepen_thought", "rank_solutions")
graph.add_edge("rank_solutions", END)

# Compile the graph
app = graph.compile()

In [None]:
# Execute the graph

def execute_graph(question):
  for s in app.stream({
        "input": question,
        "factors": "age, gender"
  }):
     return s

In [None]:
def langsmith_app(inputs):
  output = execute_graph(inputs["question"])
  return {"output": output}

In [None]:
from langsmith import evaluate

experiment_results = evaluate(
    langsmith_app, # AI System
    data = "XAI_EVAL",
    evaluators=evaluators,
    experiment_prefix="tot-template-evaluation-gpt-4o-mini"
)

View the evaluation results for experiment: 'tot-template-evaluation-gpt-4o-mini-2acbf5f5' at:
https://smith.langchain.com/o/683c5cb9-3b64-5127-a5a8-405b032642f2/datasets/b7b8dceb-8703-42f2-92ba-cfa8aad7542b/compare?selectedSessions=c8b01721-b855-4d58-8be9-04c744b06af5




0it [00:00, ?it/s]

## Using LangChain

Ref: https://medium.com/@astropomeai/implementing-the-tree-of-thoughts-in-langchains-chain-f2ebc5864fac

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [None]:
template = """
Step 1:
You are an AI chatbot designed to assist doctors in diagnosing patients. Your role is to provide accurate diagnoses based on the {input}. Answer by exploring differential diagnosis.
A:
"""

prompt = PromptTemplate(
    input_variables=["input"],
    template=template
)

chain1 = LLMChain(
    llm=ChatOpenAI(temperature=0, model_name="gpt-4o-mini"),
    prompt=prompt,
    output_key="solutions"
)

template="""
Step 2:
For each diagnosis, evaluate their potential. Consider their pros and cons, explainability of the output, clarity, relevance, logical flow.
Assign a probability of success and a confidence level to each option based on these factors.

{solutions}
A:
"""

prompt = PromptTemplate(
    input_variables=["solutions"],
    template=template
)

chain2 = LLMChain(
    llm=ChatOpenAI(temperature=0, model_name="gpt-4o-mini"),
    prompt=prompt,
    output_key="review"
)

template="""
Step 3:
For each diagnosis, deepen the thought process. Aim to provide explanatory details on the thought process, rather than just state conclusions,
including briefly noting why some thoughts were deemed less ideal.
{review}
A:
"""

prompt = PromptTemplate(
    input_variables=["review"],
    template=template
)

chain3 = LLMChain(
    llm=ChatOpenAI(temperature=0, model_name="gpt-4o-mini"),
    prompt=prompt,
    output_key="deepen_thought_process"
)

template="""
Step 4:
Based on the evaluations, rank the diagnosis in order of promise. Provide a justification for each ranking and offer any final thoughts or considerations for each solution
{deepen_thought_process}
A:
"""

prompt = PromptTemplate(
    input_variables=["deepen_thought_process"],
    template=template
)

chain4 = LLMChain(
    llm=ChatOpenAI(temperature=0, model_name="gpt-4o-mini"),
    prompt=prompt,
    output_key="ranked_diagnosis"
)

  chain1 = LLMChain(


In [None]:
from langchain.chains import SequentialChain

overall_chain = SequentialChain(
    chains=[chain1, chain2, chain3, chain4],
    input_variables=["input"],
    output_variables=["ranked_diagnosis"],
    verbose=True
)

In [None]:
def run_tot_chain(question):
  output = overall_chain({"input": question })
  return output

In [None]:
def langsmith_app(inputs):
  output = run_tot_chain(inputs["question"])
  return {"output": output}

In [None]:
from langsmith import evaluate

experiment_results = evaluate(
    langsmith_app, # AI System
    data = "XAI_EVAL",
    evaluators=evaluators,
    experiment_prefix="tot-template-evaluation-gpt-4o-mini_v2"
)

View the evaluation results for experiment: 'tot-template-evaluation-gpt-4o-mini_v2-31030c76' at:
https://smith.langchain.com/o/683c5cb9-3b64-5127-a5a8-405b032642f2/datasets/b7b8dceb-8703-42f2-92ba-cfa8aad7542b/compare?selectedSessions=b360c270-e8a6-4a24-8607-bdde1ea8d45f




0it [00:00, ?it/s]



[1m> Entering new SequentialChain chain...[0m

[1m> Entering new SequentialChain chain...[0m


[1m> Entering new SequentialChain chain...[0m


[1m> Entering new SequentialChain chain...[0m



[1m> Entering new SequentialChain chain...[0m


  output = overall_chain({"input": question })
  output = overall_chain({"input": question })



[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m
