In [None]:
!pip install langchain langchain-community transformers accelerate bitsandbytes sentencepiece torch -q

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.agents import AgentType, initialize_agent
from langchain.memory import ConversationBufferMemory
from langchain.tools import Tool
import torch


In [None]:
!pip uninstall -y flash-attn -q
!pip install ninja packaging -q
!pip install flash-attn --no-build-isolation -q

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

hf_token = "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"
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

# Load model with CPU offloading enabled
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # Efficient precision
    device_map="auto",  # Automatically allocates layers to GPU/CPU
    load_in_4bit=True,  # Load model in 4-bit quantization
    llm_int8_enable_fp32_cpu_offload=True,  # Offload large layers to CPU
    token=hf_token
)

print("✅ Model loaded with CPU offloading for limited GPU memory!")


In [None]:
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=500,
    temperature=0.7,
    top_p=0.9,
    max_new_tokens = 1024
)
llm = HuggingFacePipeline(pipeline=text_pipeline)


In [None]:
def therapist_ask(input_text=""):
    """Ensures therapist asks a question and waits for a patient response."""
    prompt = f"""
    You are a therapist having a conversation with a patient.

    - **Ask questions to assess the patient's mental health.**
    - **Do NOT answer your own question.**
    - Keep responses **short, natural, and empathetic**.
    - Ensure each response **leads to another patient answer**.

    Example conversation:
    Patient: "I feel exhausted all the time."
    Therapist: "That sounds really tough. Have you been sleeping okay?"

    Patient: {input_text}
    Therapist:
    """

    response = llm.invoke(prompt)
    print(f"\n💡 DEBUG: Therapist response -> {response}")  # ✅ Debugging line
    return response.strip()

therapist_tool = Tool(
    name="Therapist",
    func=therapist_ask,
    description="Asks natural, empathetic questions to assess depression.",
    return_direct=True
)


In [None]:
def patient_reply(input_text=""):
    """Ensures patient always responds after the therapist."""
    prompt = f"""
    You are a patient speaking to a therapist about your mental health.
    
    - Respond naturally to the therapist's question.
    - **Do NOT skip a response**—always say something.
    - Keep responses **short but meaningful** (1-2 sentences).
    - Show emotions where appropriate.
    - Never stay silent.

    Example conversation:
    Therapist: "How have you been feeling lately?"
    Patient: "I’ve been feeling really down and unmotivated. Even simple tasks feel exhausting."

    Therapist: {input_text}
    Patient:
    """

    response = llm.invoke(prompt)
    print(f"\n💡 DEBUG: Patient response -> {response}")  # ✅ Debugging line
    return response.strip()

patient_tool = Tool(
    name="Patient",
    func=patient_reply,
    description="Responds to therapist questions with emotions and details.",
    return_direct=True  # ✅ Ensures LangChain processes the response immediately
)


In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [None]:
from langchain.agents import AgentType

therapist_agent = initialize_agent(
    tools=[therapist_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # ✅ Forces a real conversation flow
    verbose=True,
    memory=memory,
    handle_parsing_errors=True
)

patient_agent = initialize_agent(
    tools=[patient_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # ✅ Ensures patient participates
    verbose=True,
    memory=memory,
    handle_parsing_errors=True
)


In [None]:
num_rounds = 1

In [None]:
patient_input = "I've been feeling really down lately."

for _ in range(5):  # Number of conversation turns
    print("\n🧑‍⚕️ Therapist is thinking...")

    # Therapist asks a question
    therapist_response = therapist_agent.invoke({"input": patient_input})  # ✅ Pass input as dictionary
    print(f"\n🧑‍⚕️ Therapist: {therapist_response}")

    print("\n😞 Patient is responding...")

    # Patient responds to the therapist's question
    patient_response = patient_agent.invoke({"input": therapist_response})  # ✅ Pass therapist's response
    print(f"\n😞 Patient: {patient_response}")

    # Pass patient's response back to therapist
    patient_input = patient_response
