<a href="https://colab.research.google.com/github/Wenjia000/nutrition-learning-agent/blob/main/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

!pip install -q langgraph langchain-openai scikit-learn

import pandas as pd
import random
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from langgraph.graph import StateGraph
from typing import TypedDict, Optional, List
from langchain_openai import ChatOpenAI
import os
from google.colab import drive, userdata

# load API key
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")

# LLM setup
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.7,
    api_key=os.environ["OPENAI_API_KEY"]
)

# load data
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/MyDrive/USDA.csv')


# make food name shorter
def simplify_name(desc: str) -> str:
    parts = desc.split(",")
    if len(parts) >= 2:
        return parts[0].lower().strip() + " " + parts[1].lower().strip()
    return parts[0].lower().strip()


# question generators(easy, meduim, hard)

NUTRIENTS_EASY = ["Protein", "Calories", "TotalFat", "Carbohydrate", "Sugar", "Calcium"]

GOALS_MEDIUM = {
    "build muscle": "Protein",
    "bone health": "Calcium",
    "a low-fat diet": "TotalFat",
    "a low-sugar diet": "Sugar",
    "heart health": "Sodium",
}

def generate_easy_question(df):
    sampled = df.sample(2)
    f1, f2 = sampled.iloc[0], sampled.iloc[1]
    name1 = simplify_name(f1["Description"])
    name2 = simplify_name(f2["Description"])
    nutrient = random.choice(NUTRIENTS_EASY)
    question = f"Which has more {nutrient.lower()}: {name1} or {name2}?"
    answer = name1 if f1[nutrient] > f2[nutrient] else name2
    return {"question": question, "answer": answer, "difficulty": "easy"}


def generate_medium_question(df):
    sampled = df.sample(2)
    f1, f2 = sampled.iloc[0], sampled.iloc[1]
    name1 = simplify_name(f1["Description"])
    name2 = simplify_name(f2["Description"])
    goal, nutrient = random.choice(list(GOALS_MEDIUM.items()))
    v1, v2 = f1[nutrient], f2[nutrient]
    question = f"If you're trying to {goal}, which is better: {name1} or {name2}?"
    if "low" in goal:
        answer = name1 if v1 < v2 else name2
    else:
        answer = name1 if v1 > v2 else name2
    return {"question": question, "answer": answer, "difficulty": "medium"}


def generate_hard_question(df):
    sampled = df.sample(3)
    f1, f2, f3 = sampled.iloc[0], sampled.iloc[1], sampled.iloc[2]
    name1 = simplify_name(f1["Description"])
    name2 = simplify_name(f2["Description"])
    name3 = simplify_name(f3["Description"])
    question = f"You want high protein but low fat. Which is best: {name1}, {name2}, or {name3}?"
    def score(food):
        return food["Protein"] - food["TotalFat"]
    scores = {name1: score(f1), name2: score(f2), name3: score(f3)}
    answer = max(scores, key=scores.get)
    return {"question": question, "answer": answer, "difficulty": "hard"}


# logistic regression classifier
np.random.seed(0)

data = {
    "accuracy": np.random.rand(500),
    "easy_correct": np.random.randint(0, 5, 500),
    "medium_correct": np.random.randint(0, 5, 500),
    "hard_correct": np.random.randint(0, 5, 500),
}
df_sim = pd.DataFrame(data)

df_sim["level"] = pd.cut(
    df_sim["accuracy"],
    bins=[0, 0.4, 0.7, 1.0],
    labels=[0, 1, 2]
).astype(int)

X = df_sim[["accuracy", "easy_correct", "medium_correct", "hard_correct"]]
y = df_sim["level"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

clf = LogisticRegression(max_iter=500)
clf.fit(X_train, y_train)


# agent state
class AgentState(TypedDict):
    last_question: Optional[str]
    last_answer: Optional[str]
    last_difficulty: Optional[str]
    user_answer: Optional[str]
    history: List[str]
    correct_list: List[int]
    accuracy: float
    easy_correct: int
    medium_correct: int
    hard_correct: int
    predicted_level: Optional[int]
    continue_flag: bool
    ab_version: str


# nodes

# randomly assign A/B version
def assign_ab_version(state: AgentState):
    state["ab_version"] = random.choice(["A", "B"])
    state["history"].append(f"AB version = {state['ab_version']}")
    return state

# greeting node
def greeting(state: AgentState):
    state["history"].append("Hello! Let's start your nutrition learning session.")
    if state["predicted_level"] is None:
        state["predicted_level"] = 0
    return state

# choose difficulty and generate question
def ask_question(state: AgentState):
    lvl = state["predicted_level"]
    if lvl == 0:
        qa = generate_easy_question(df)
    elif lvl == 1:
        qa = generate_medium_question(df)
    else:
        qa = generate_hard_question(df)
    state["last_question"] = qa["question"]
    state["last_answer"] = qa["answer"]
    state["last_difficulty"] = qa["difficulty"]
    state["history"].append("QUESTION: " + qa["question"])
    return state

# tutor node using A/B difference
def llm_node(state: AgentState):
    q = state["last_question"]
    if state["ab_version"] == "A":
        prompt = f"Encourage the student to answer: {q}"
    else:
        prompt = f"Give a step-by-step friendly explanation and encourage answering:\n{q}"
    reply = llm.invoke(prompt).content
    state["history"].append("TUTOR: " + reply)
    return state


def collect_answer(state: AgentState):
    ans = input(
        f"Your answer:\n{state['last_question']}\n(type 'hint' or 'stop')\n> "
    ).strip().lower()
    state["user_answer"] = ans
    return state

# route: hint / grade / end
def route_user_action(state: AgentState):
    if state["user_answer"] == "hint":
        return "hint"
    if state["user_answer"] == "stop":
        return "end"
    return "grade"

# hint node with A/B variation
def hint_node(state: AgentState):
    q = state["last_question"]
    diff = state["last_difficulty"]
    if state["ab_version"] == "A":
        prompt = f"Give a short hint:\n{q}"
    else:
        prompt = f"Give a detailed reasoning-style hint for this {diff} question:\n{q}"
    msg = llm.invoke(prompt).content
    state["history"].append("HINT: " + msg)
    print("\n[HINT] " + msg + "\n")
    return state

# grade answer
def grade_answer(state: AgentState):
    user = state["user_answer"]
    correct = state["last_answer"]
    diff = state["last_difficulty"]

    if user is None:
        return state

    is_correct = 1 if user == correct else 0
    state["correct_list"].append(is_correct)

    if is_correct:
        if diff == "easy": state["easy_correct"] += 1
        if diff == "medium": state["medium_correct"] += 1
        if diff == "hard": state["hard_correct"] += 1

    if len(state["correct_list"]) >= 5:
        state["accuracy"] = sum(state["correct_list"][-5:]) / 5

    print(f"\nRESULT: {'correct' if is_correct else 'incorrect'}\n")
    return state

# classifier update
def classify_level(state: AgentState):
    user_X = pd.DataFrame([{
        "accuracy": state["accuracy"],
        "easy_correct": state["easy_correct"],
        "medium_correct": state["medium_correct"],
        "hard_correct": state["hard_correct"],
    }])
    lvl = int(clf.predict(user_X)[0])
    state["predicted_level"] = lvl
    print("LEVEL =", lvl)
    return state

# continue rule
def continue_or_end(state: AgentState):
    if len(state["correct_list"]) >= 10:
        state["continue_flag"] = False
    return state


# build graph
# Flow:
# assign_ab
#   → greeting
#       → ask_question
#           → llm_talk
#               → collect_answer
#                   → hint → collect_answer
#                   → grade → continue_check → classify → ask_question
#                   → end

graph = StateGraph(AgentState)

graph.add_node("assign_ab", assign_ab_version)
graph.add_node("greeting", greeting)
graph.add_node("ask_question", ask_question)
graph.add_node("llm_talk", llm_node)
graph.add_node("collect_answer", collect_answer)
graph.add_node("hint", hint_node)
graph.add_node("grade", grade_answer)
graph.add_node("classify", classify_level)
graph.add_node("continue_check", continue_or_end)

graph.set_entry_point("assign_ab")
graph.add_edge("assign_ab", "greeting")
graph.add_edge("greeting", "ask_question")
graph.add_edge("ask_question", "llm_talk")
graph.add_edge("llm_talk", "collect_answer")

graph.add_conditional_edges(
    "collect_answer",
    route_user_action,
    {
        "hint": "hint",
        "grade": "grade",
        "end": "__end__",
    }
)

graph.add_edge("hint", "collect_answer")
graph.add_edge("grade", "continue_check")

graph.add_conditional_edges(
    "continue_check",
    lambda s: "continue" if s["continue_flag"] else "end",
    {
        "continue": "classify",
        "end": "__end__",
    }
)

graph.add_edge("classify", "ask_question")

app = graph.compile()

initial_state: AgentState = {
    "last_question": None,
    "last_answer": None,
    "last_difficulty": None,
    "user_answer": None,
    "history": [],
    "correct_list": [],
    "accuracy": 0.0,
    "easy_correct": 0,
    "medium_correct": 0,
    "hard_correct": 0,
    "predicted_level": None,
    "continue_flag": True,
    "ab_version": "A",
}

final_state = app.invoke(initial_state)

error: invalid key: inooe000@gmail.com
error: key does not contain a section: Wenjia000
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/.git/
error: open("drive/MyDrive/To-do list.gsheet"): Operation not supported
error: unable to index file 'drive/MyDrive/To-do list.gsheet'
fatal: adding files failed
error: switch `m' requires a value
error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/Wenjia000/nutrition-learning-agent.git'
[mDrive already mounted at /content/

KeyboardInterrupt: Interrupted by user