In [None]:
pip install langchain langchain-google-genai langgraph

In [6]:
import os, getpass

if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google Gemini API Key: ")


Enter your Google Gemini API Key:  ········


In [7]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver
from typing import TypedDict, List
from langchain_core.runnables import RunnableLambda
from langchain_core.messages import trim_messages


In [8]:
class State(TypedDict):
    messages: List
    language: str


In [9]:
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")


In [10]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert career counsellor. 
Provide career guidance, skill recommendations, growth strategies, 
and suggest popular online courses (Coursera, Udemy, edX, LinkedIn Learning).
Always answer in a supportive and structured way."""
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)


In [11]:
# Keep only last 5 messages OR 1000 tokens (whichever comes first)
trimmer = RunnableLambda(
    lambda msgs: trim_messages(
        msgs,
        max_tokens=1000,
        strategy="last",   # keep most recent
        token_counter=len, # simplistic, better to use model tokenizer
        start_on="human",  # trim from first human message
        end_on=None
    )
)


In [12]:
def call_model(state: State):
    # Trim history
    trimmed_messages = trimmer.invoke(state["messages"])

    # Debug log
    # print(f"Messages before trim: {len(state['messages'])}, after trim: {len(trimmed_messages)}")

    # Create prompt
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )

    # Get response
    response = model.invoke(prompt)

    return {"messages": trimmed_messages + [response]}


In [13]:
workflow = StateGraph(State)
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")
workflow.add_edge("model", END)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)


In [None]:
config = {"configurable": {"thread_id": "career123"}}

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Bot: Goodbye, and best of luck in your career journey!")
        break

    input_messages = [HumanMessage(user_input)]

    full_response = []
    for chunk, metadata in app.stream(
        {"messages": input_messages, "language": "English"},
        config,
        stream_mode="messages",
    ):
        if isinstance(chunk, AIMessage):
            full_response.append(chunk.content)
            print(chunk.content, end="", flush=True)
    print("\n")


In [None]:
# career_bot.py
import os
import getpass
import json
from typing import TypedDict, List, Optional

# ------------- API Key -------------
if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass(
        "Enter your Google Gemini API Key (GOOGLE_API_KEY): "
    )

# ------------- Imports -------------
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableLambda
from langgraph.graph import StateGraph, START, END
from langgraph.checkpoint.memory import MemorySaver

# ------------- Token counter (try tiktoken, fallback to words) -------------
try:
    import tiktoken

    def token_count_text(text: str) -> int:
        # cl100k_base works reasonably for many models; adjust if you have a Gemini tokenizer
        enc = tiktoken.get_encoding("cl100k_base")
        return len(enc.encode(text))
except Exception:
    def token_count_text(text: str) -> int:
        return max(1, len(text.split()))

# ------------- Config (tune these) -------------
MAX_TOKENS_HISTORY = 900     # approx tokens to keep in history
MAX_MESSAGES_HISTORY = 8     # fallback limit to keep if token counting not perfect
SUMMARY_ROUND_TOKEN_LIMIT = 300  # how long summary we allow (in tokens)

# ------------- State definition -------------
class State(TypedDict):
    messages: List  # list of HumanMessage/AIMessage
    language: str
    summary: Optional[str]  # condensed summary of older conversation

# ------------- Initialize Gemini model -------------
# Choose your model string: "gemini-1.5-flash" or "gemini-1.5-pro" etc.
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

# ------------- Prompt templates -------------
# Main career counselor template — it uses {summary} (short user profile) and messages placeholder
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            (
                "You are an expert career counsellor. Use the user summary (if present) to "
                "personalize answers. Provide structured, actionable advice: career suggestions, "
                "skills to learn, growth strategies, and 2-4 recommended online courses (platform + course name). "
                "Be concise and give bullets and next steps."
            ),
        ),
        # optional short user-profile summary
        ("system", "User summary (if any): {summary}"),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

# Summarizer template (to compress removed old chats into a short profile)
summarizer_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            (
                "You are a summarization assistant. Read the conversation below and produce "
                "a concise, factual bulleted summary (3-6 bullets) containing: user's background, "
                "career preferences, preferred technologies, constraints (time/location), goals, "
                "and important follow-ups. Output only the bullet list (no commentary)."
            ),
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

# ------------- Helper trimming function -------------
def simple_trim(messages, max_tokens=MAX_TOKENS_HISTORY, max_messages=MAX_MESSAGES_HISTORY):
    """
    Keeps the most recent messages while staying under max_tokens or max_messages.
    Returns (kept_messages, removed_messages).
    """
    if not messages:
        return [], []

    rev = list(reversed(messages))
    kept = []
    token_sum = 0
    for m in rev:
        text = (m.content or "") if hasattr(m, "content") else str(m)
        t = token_count_text(text)
        if (len(kept) < max_messages) and (token_sum + t <= max_tokens):
            kept.append(m)
            token_sum += t
        else:
            # stop adding, these and everything older is removed
            break
    kept.reverse()
    removed = messages[: len(messages) - len(kept)]
    return kept, removed

# ------------- Summarizer function -------------
def summarize_removed(removed_messages, existing_summary: Optional[str] = None) -> str:
    """
    Summarize removed messages using the model and append to existing_summary.
    Keeps summaries short.
    """
    if not removed_messages:
        return existing_summary or ""

    # Create a prompt using the summarizer_template
    prompt = summarizer_template.invoke({"messages": removed_messages})
    resp = model.invoke(prompt)

    new_summary_piece = resp.content.strip()
    if existing_summary:
        combined = existing_summary.strip() + "\n" + new_summary_piece
    else:
        combined = new_summary_piece

    # Optionally compress the combined summary if it's long
    # (We do a naive trim by token count: ask the model to compress)
    if token_count_text(combined) > SUMMARY_ROUND_TOKEN_LIMIT:
        compress_prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "Compress the following summary to 4 bullet points, preserving key facts and constraints. Output only bullets."
                ),
                MessagesPlaceholder(variable_name="messages"),
            ]
        )
        # wrap combined into a single HumanMessage so template accepts it
        compressed_prompt = compress_prompt.invoke({"messages": [HumanMessage(combined)]})
        compressed_resp = model.invoke(compressed_prompt)
        combined = compressed_resp.content.strip()

    return combined

# ------------- The single graph node that trims, summarizes, and calls the model -------------
def call_model(state: State):
    # state arrives with previous memory merged by MemorySaver plus incoming messages
    messages = state.get("messages", []) or []
    language = state.get("language", "English")
    existing_summary = state.get("summary", "") or ""

    # 1) Trim
    kept_messages, removed_messages = simple_trim(messages)
    # debugging prints (optional)
    # print(f"[debug] before={len(messages)} kept={len(kept_messages)} removed={len(removed_messages)}")

    # 2) If some messages were removed, summarize them and update summary
    if removed_messages:
        new_summary = summarize_removed(removed_messages, existing_summary)
    else:
        new_summary = existing_summary

    # 3) Build final prompt and call the model.
    #    prompt_template expects {summary} and messages placeholder
    prompt = prompt_template.invoke({"messages": kept_messages, "summary": new_summary, "language": language})
    response = model.invoke(prompt)

    # 4) Return updated state: keep the trimmed history + the new response, and store updated summary
    new_messages = kept_messages + [response]
    return {"messages": new_messages, "language": language, "summary": new_summary}

# ------------- Build workflow and memory -------------
workflow = StateGraph(state_schema=State)
workflow.add_node("model", call_model)
workflow.add_edge(START, "model")
workflow.add_edge("model", END)

memory = MemorySaver()  # uses persistent storage internally (depending on langgraph version)
app = workflow.compile(checkpointer=memory)

# ------------- Simple CLI loop with streaming -------------
def chat_loop():
    thread_id = "career_thread_001"
    config = {"configurable": {"thread_id": thread_id}}

    print("CareerBot — type 'exit' to quit.")
    while True:
        user_input = input("You: ").strip()
        if not user_input:
            continue
        if user_input.lower() in ("exit", "quit"):
            print("Bot: Goodbye — good luck on your career journey!")
            break

        input_messages = [HumanMessage(user_input)]
        # stream response pieces
        full = []
        for chunk, metadata in app.stream(
            {"messages": input_messages, "language": "English"},
            config,
            stream_mode="messages",
        ):
            if isinstance(chunk, AIMessage):
                # print streaming chunk
                print(chunk.content, end="", flush=True)
                full.append(chunk.content)
        print("\n")  # newline after message completes

if __name__ == "__main__":
    chat_loop()

