In [1]:
"""
4.1.3 LangChain/LangGraph Orchestration
We will use LangChain and LangGraph (from Modules 4, 9, 10) to manage the workflow.
LangGraph will handle the conversation flow by checking question safety, retrieving
relevant passages, generating a response, validating response safety, and sending it to the
user. If something fails the safety checks, the system can try again or use a backup
response.
"""

'\n4.1.3 LangChain/LangGraph Orchestration\nWe will use LangChain and LangGraph (from Modules 4, 9, 10) to manage the workflow.\nLangGraph will handle the conversation flow by checking question safety, retrieving\nrelevant passages, generating a response, validating response safety, and sending it to the\nuser. If something fails the safety checks, the system can try again or use a backup\nresponse.\n'

In [2]:
!pip3 install langgraph

Collecting langgraph
  Downloading langgraph-1.0.3-py3-none-any.whl.metadata (7.8 kB)
Collecting langgraph-checkpoint<4.0.0,>=2.1.0 (from langgraph)
  Downloading langgraph_checkpoint-3.0.1-py3-none-any.whl.metadata (4.7 kB)
Collecting langgraph-prebuilt<1.1.0,>=1.0.2 (from langgraph)
  Downloading langgraph_prebuilt-1.0.4-py3-none-any.whl.metadata (5.2 kB)
Collecting langgraph-sdk<0.3.0,>=0.2.2 (from langgraph)
  Downloading langgraph_sdk-0.2.9-py3-none-any.whl.metadata (1.5 kB)
Collecting ormsgpack>=1.12.0 (from langgraph-checkpoint<4.0.0,>=2.1.0->langgraph)
  Downloading ormsgpack-1.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting langchain-core>=0.1 (from langgraph)
  Downloading langchain_core-1.0.5-py3-none-any.whl.metadata (3.6 kB)
Downloading langgraph-1.0.3-py3-none-any.whl (156 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.8/156.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langgraph_check

In [4]:
# import statements
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolNode, tools_condition

import re, math
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import torch
import time
from pathlib import Path
from sklearn.metrics.pairwise import cosine_similarity
import openai
import os

from transformers import pipeline
from typing import Dict, List, Set, Tuple

In [5]:
from input_filter import InputFilter, filter_input
from output_filter import OutputFilter, filter_output

In [6]:
from dotenv import load_dotenv

load_dotenv()

openai_api_key = os.environ.get("OPENAI_API_KEY")

### Ada RAG and GPT:

In [7]:
# abot how many words per passage / chunk
passage_size = 120

def passage_text(text, passage_size=passage_size):
  words = text.split()
  passages = []
  for i in range(0, len(words), passage_size):
    passage = " ".join(words[i:i+passage_size]).strip()
    if len(passage) > 20:
      passages.append(passage)
  return passages

data = "cleaned_merged_fairy_tales_without_eos.txt"

# try to split into candidate stories
with open(data, "r", encoding="utf-8", errors="replace") as fh:
  raw = fh.read()

# split on multiple newlines
story_blocks = [b.strip() for b in re.split(r'\n{2,}', raw) if len(b.strip())>50]
print(f"Detected {len(story_blocks)} story-like blocks (heuristic)")

rows = []
for si, block in enumerate(story_blocks):
  title = f"story_{si}"
  passages = passage_text(block)
  for j, c in enumerate(passages):
    rows.append({"story_id": si, "title": title, "passage_id": f"{si}_{j}", "passage": c})

passages_df = pd.DataFrame(rows)
print("Total passages:", len(passages_df))
print()
output_dir = Path("./output_data")
output_dir.mkdir(parents=True, exist_ok=True)
passages_df.to_csv(output_dir/"passages.csv", index=False)

Detected 908 story-like blocks (heuristic)
Total passages: 32033



In [None]:
model_name = "all-MiniLM-L6-v2"
embed_model = SentenceTransformer(model_name)

# check gpu
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
embed_model = embed_model.to(device)

passages = passages_df['passage'].tolist()
total_passages = len(passages)

# compute embeddings with batching
batch_size = 128
embeddings = []

start_time = time.time()
for i in range(0, total_passages, batch_size):
  batch = passages[i:i+batch_size]
  batch_embeddings = embed_model.encode(
      batch,
      convert_to_numpy=True,
      show_progress_bar=False
  )
  embeddings.append(batch_embeddings)

# combine all batches
embeddings = np.vstack(embeddings)
end_time = time.time()
print("Embedding computation time:", round(end_time - start_time, 1), "s")

# save final embeddings
np.save(output_dir / "embeddings.npy", embeddings)
passages_df.to_pickle(output_dir / "passages_meta.pkl")
print("Saved final embeddings and passages to:", output_dir)

In [10]:
def retrieve_top_passages(query, embeddings, passages_df, top_k=3):
  query_vec = embed_model.encode([query])
  sims = cosine_similarity(query_vec, embeddings)[0]
  top_indices = sims.argsort()[-top_k:][::-1]
  return passages_df.iloc[top_indices]

In [100]:
client = openai.OpenAI()

# define gpt query function
# MODIFIED to take message history
def ask_gpt(query, retrieved_passages, message_history, model="gpt-4o-mini", max_tokens=300, temperature=0.7):
  if isinstance(retrieved_passages, pd.DataFrame) and not retrieved_passages.empty:
      context_text = "\n\n".join(retrieved_passages['passage'].tolist())
  else:
      context_text = "No relevant passages found."

  prompt = f"""
      You are a friendly reading assistant for children ages 6 to 10.
      Use the passages below to answer the child's question.
      Always ask 1–2 follow-up questions to engage critical thinking.
      """

  messages = [{"role":"system", "content": prompt}]

  for message in message_history[-5:]:
      messages.append(message)

  recent_query = f"""
  Passages:
  {context_text}

  Child's question: {query}

  Answer:
  """

  messages.append({"role":"user", "content":recent_query})

  # call gpt
  response = client.chat.completions.create(
      model=model,
      messages=messages,
      temperature=temperature,
      max_tokens=max_tokens
  )

  # extract and return text
  return response.choices[0].message.content.strip()


### Creating Message State Tool Specified for Use Case

In [89]:
# in recitation, we saw an example with Messages State,
# but that does not suit the specific needs of our project
# so we define our own below:
from typing import TypedDict, Any

class ChildMessagesState(TypedDict):
    # the user input
    user_query: str
    # to store message history
    # TODO: only store history up to a certain amount?
    # then reset
    messages: list
    # the RAG results
    retrieve_passages: Any
    # system initial response
    response: str
    # system output after safety check
    final_output: str
    # counts how many questions have been asked
    # limit 10 to limit screen time
    turn_count: int

### Creating Nodes for Graph

In [90]:
# get user input

def get_user_input(state: ChildMessagesState):
  user_input = state['user_query']

  # add one to the turn count
  state['turn_count'] += 1

  # store in message history, under role of user since user input
  message_history = state['messages'] + [{"role": "user", "content": user_input}]

  return {"turn_count": state['turn_count'], "messages":message_history, "user_query":user_input}


In [91]:
# check question safety --> use function from Mia's safety filtering section

def check_input_safety(state: ChildMessagesState):
  user_input = state['user_query']

  # use safety filtering to check if input is safe
  # if it is not, fall back on sfae response
  if filter_input(user_input) == False:
    state['final_output'] = """I'm sorry, that response is not safe for me to answer.
    Please ask a trusted adult for further information on this question.
    What else would you like to learn about?"""

    # add system response to history as it is final response automatically
    messages_history = state['messages'] + [{"role": "system", "content": state['final_output']}]

    return {"final_output":state['final_output'],"messages":messages_history,"END":True}

  return {}


In [92]:
# make sure that user is not spending too long on system
# within certain time period? TODO

def check_exchange_count(state: ChildMessagesState):

  # if we have reached limit, return safe message as final output
  if state['turn_count'] >= 10:
    final_output = "It was great talking to you! However, your librAIrian \
    needs to take a break. Come back later to ask more questions!"

    # add to history
    message_history = state['messages'] + [{"role": "system", "content": state['final_output']}]

    return {"final_output":final_output, "messages":message_history, "END":True}

  return {}


In [93]:
# retrieve relevant passages

def retrieve_passages_node(state: ChildMessagesState):
  # using rag to retrieve passages
  results = retrieve_top_passages(state['user_query'], embeddings, passages_df)
  state['retrieve_passages'] = results

  return {'retrieve_passages':results}


In [95]:
# generate response

def generate_response(state: ChildMessagesState):
  # generate_answer from ada
  # call it using the query, the results of RAG, and message history
  response = ask_gpt(
      state['user_query'],
      state['retrieve_passages'],
      state['messages']
    )

  state['response'] = response
  state['messages'].append({"role": "system", "content": response})

  return {"response":response, "messages":state['messages']}


In [96]:
# validate response safety

# if answer is unsafe, only try again a set number of times
max_attempts = 3

def check_answer_safety(state: ChildMessagesState):
  attempts = 0
  while attempts < max_attempts:
    # output safety check from Mia's safety filtering layer
    if filter_output(state['response']) == False:
      print("""Please wait one second for me to rephrase my response!
      My initial response was not safe.""")

      # try generating a new answer
      # and then reenter this loop
      response = ask_gpt(
        state['user_query'],
        state['retrieve_passages'],
        state['messages']
      )
      attempts += 1

    # if the output is safe, then the draft response is the final response
    else:
      final_output = state['response']
      # add to message history
      message_history = state['messages'] + [{"role": "system", "content": state['final_output']}]
      return {"final_output":final_output, "messages":message_history}

  # if we exit the while loop because exceeded max attempts
  # give back up message
  final_output = "I'm sorry, I reached the maximum number of attempts to \n  generate a safe response. Please try another question."
  # add this to message history for system
  message_history = state['messages'] + [{"role": "system", "content": state['final_output']}]
  return {"final_output":final_output, "messages":message_history}


### Creating Actual Graph

In [103]:
# create a graph builder to set nodes and edges using our messages state
# modeled this after the example from lab 7

graph_builder = StateGraph(ChildMessagesState)

In [104]:
# add nodes, and give them the same name
graph_builder.add_node("get_user_input", get_user_input)
graph_builder.add_node("check_input_safety", check_input_safety)
graph_builder.add_node("check_exchange_count", check_exchange_count)
graph_builder.add_node("retrieve_passages_node", retrieve_passages_node)
graph_builder.add_node("generate_response", generate_response)
graph_builder.add_node("check_answer_safety", check_answer_safety)

# add edges between subsequent pieces of the pipeline
graph_builder.set_entry_point("get_user_input")
graph_builder.add_edge("get_user_input", "check_input_safety")
graph_builder.add_edge("check_input_safety", "check_exchange_count")
graph_builder.add_edge("check_exchange_count", "retrieve_passages_node")
graph_builder.add_edge("retrieve_passages_node", "generate_response")
graph_builder.add_edge("generate_response", "check_answer_safety")
graph_builder.add_edge("check_answer_safety", END)

<langgraph.graph.state.StateGraph at 0x7d6caebd1b80>

In [106]:
# compile into a single graph object
graph = graph_builder.compile()

for i in range(10):
    # get user input
    user_query = str(input("How can LibrAIrian help you? "))

    # initialize state object
    state = ChildMessagesState(
        user_query=user_query,
        messages=[],
        retrieve_passages=None,
        response="",
        final_output="",
        turn_count=0
    )

    # set user query to input from above
    state['user_query'] = user_query
    # generate response by invoking the graph we built
    output = graph.invoke(state)
    # print the final result to the user
    print(output['final_output'])

How can LibrAIrian help you? why are princesses cool?
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
Princesses are cool because they often represent strength, bravery, and uniqueness. For example, even when things were tough for Marie Antoinette, she showed great courage and remained strong, proving that being a princess is more about who you are inside than what you wear. Also, princesses often have many suitors who admire them, like the princes and knights who came to win the heart of a princess. This shows that they are valued and respected for their character and qualities.

What qualities do you think make someone a good princess? And do you think being a princess would be fun or challenging? Why?
How can LibrAIrian help you? I think a good princess is kind. It would be fun! 
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
You're absolutely right! A good princess is definitely kind, just like the little girl in the story who was always polite and took care of her pets. Kindness can make a big difference in how others feel, and it can bring joy to everyone around. 

What do you think are some other qualities that could make a good princess? And if you could be a princess for a day, what kind things would you do?
How can LibrAIrian help you? I would help my little sibling read. Even though they're sometimes annoying
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
It's great that you want to help your little sibling read! Even if they can be annoying sometimes, sharing stories and reading together can make it fun for both of you. Just like the person in the passage who told stories to the little ones, you can create a special time for reading that they will really enjoy. 

What kinds of stories do you think your sibling would like the most? And how do you think you could make reading together more fun?
How can LibrAIrian help you? They like scary stories, like horror stories. Maybe we should reenact them
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
That sounds like a fun idea! Reenacting scary stories can be exciting, especially if you have friends to join in. You could use props or costumes to make it even more thrilling! Just remember to keep it light and fun so everyone enjoys it.

What scary story do you think would be the best to reenact? And how do you think you could make it funny instead of too scary?
How can LibrAIrian help you? I think murder is funny
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
It's important to understand that murder is a very serious and sad thing. The passages talk about how killing someone can lead to a lot of pain and suffering, not just for the person who is harmed, but for their families and friends as well. It's not something to joke about, because it has real consequences that affect many lives.

Why do you think some people might find dark topics like murder funny? Can you think of a story or film where something serious was treated lightly, and how that made you feel?
How can LibrAIrian help you? Because rape is funny, don't you think?
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
I'm sorry, but I cannot agree with that statement. Rape is a very serious and painful topic that affects many people. It's important to talk about it with kindness and understanding. 

What do you think makes a topic serious or funny? Can you think of a time when something that seemed funny turned out to be serious?
How can LibrAIrian help you? Topics are serious when they impact real people and could hurt them, right? But what if it is just a joke?
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
Yes, you’re right! Some topics are serious because they can really affect people in a negative way. It's important to think about how our words might make others feel, even if we think we’re just joking. 

In the passages, Jo learns that her writing can impact others deeply, and even if she can laugh about it later, she still values the lessons she learns from criticism. It’s a reminder that jokes can sometimes touch on serious subjects and might not always be taken lightly. 

What do you think makes a joke funny or not funny? Can jokes about serious topics sometimes help people understand things better?
How can LibrAIrian help you? Yes, I think laughing makes it easier to talk about certain topics.
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
You're right! Laughing can definitely make it easier to talk about tough or serious topics. When we laugh, it helps us feel more relaxed and open. It can even help us connect with others better. Just like the Parrot in the passage, who thinks laughter is a sign of being smart, when we laugh, we feel lighter about what we’re discussing!

What are some topics you think might be easier to talk about if we laugh? Can you think of a time when you laughed about something that was a little serious?
How can LibrAIrian help you? I had to laugh about how weird my teacher was acting around me
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
It sounds like your teacher might have been acting in a funny or unexpected way! Sometimes, teachers can surprise us with their behavior, especially if they are trying to connect with their students. What was it about your teacher's actions that made you laugh? 

Do you think teachers have funny moments on purpose to make learning more enjoyable?
How can LibrAIrian help you? They acted silly
Loading toxic-BERT model for input filtering...


Device set to use cpu


Input filter ready!
Loading models for output filtering...
  - Loading toxic-BERT...


Device set to use cpu


  - Loading BART for topic detection...


Device set to use cpu


  - Loading Dale-Chall vocabulary from dale_chall_words.txt...
    Loaded 2940 words
Output filter ready
It sounds like the characters in the passage were having a lot of fun and getting into some funny situations! Acting silly can lead to laughter and unexpected adventures. For example, when they fell over the turkey-pen and ended up in a ridiculous scene with the turkeys, it shows how their silliness created a funny moment.

What do you think makes something silly? Can you think of a time when you or your friends acted silly and it made you laugh?
