# Emotion-Aware Note Understanding & Similarity — Colab Starter\nStep-by-step notebook to: data prep → embeddings → similarity → emotion classifier → RAG → eval.


In [None]:
# %% [markdown]
# Setup
%%bash
pip -q install --upgrade pip
pip -q install transformers sentence-transformers datasets scikit-learn pandas numpy langchain chromadb rouge-score


In [None]:
# Imports & seeds
import os, random, numpy as np
import torch
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
print('Torch:', torch.__version__)


## 1) Data prep (synthetic demo)

In [None]:
demo_notes = [
    "I can't sleep, mind racing before exams.",
    "Felt better after a walk with a friend.",
    "I am overwhelmed at work and keep procrastinating.",
    "Woke up anxious, heart pounding, worried about bills.",
]
demo_labels = ["anxiety", "positive", "stress", "anxiety"]  # placeholder
len(demo_notes)


## 2) Embeddings & similarity (baseline)

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
E = embedder.encode(demo_notes, normalize_embeddings=True)
def top_k_similar(query, k=3):
    q = embedder.encode([query], normalize_embeddings=True)[0]
    sims = (E @ q)
    idx = np.argsort(-sims)[:k]
    return [(int(i), float(sims[i]), demo_notes[i]) for i in idx]
top_k_similar("racing thoughts before test", k=3)


## 3) Emotion classifier (pretrained stub)

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
emo_name = 'joeddav/distilbert-base-uncased-go-emotions-student'
tok = AutoTokenizer.from_pretrained(emo_name)
mdl = AutoModelForSequenceClassification.from_pretrained(emo_name)
labels = [
 'admiration','amusement','anger','annoyance','approval','caring','confusion','curiosity',
 'desire','disappointment','disapproval','disgust','embarrassment','excitement','fear','gratitude',
 'grief','joy','love','nervousness','optimism','pride','realization','relief','remorse','sadness','surprise','neutral']
def predict_emotions(text):
    x = tok(text, return_tensors='pt', truncation=True)
    with torch.no_grad():
        out = mdl(**x).logits.squeeze(0)
    probs = torch.softmax(out, dim=-1).tolist()
    top = sorted(list(zip(labels, probs)), key=lambda t: t[1], reverse=True)[:5]
    return top
predict_emotions("I can't sleep and my mind is racing")


## 4) Tiny RAG over coping tips (toy)

In [None]:
kb = {
 'anxiety': ["Box Breathing 4-4-4-4", "Worry Time Technique"],
 'stress': ["10-minute walk + hydration", "Pomodoro with gentle breaks"],
 'sleep': ["No screens 60m before bed", "Dim lights, same wake time"],
}
def suggest_coping_from_emotions(top):
    recs = []
    for label, p in top:
        if label in kb:
            recs.extend(kb[label])
    return list(dict.fromkeys(recs))[:3]
top = predict_emotions("I can't sleep, mind racing before exams")
suggest_coping_from_emotions(top)


## 5) Eval stubs (Precision@K, macro-F1)

In [None]:
def precision_at_k(relevances, k=3):
    topk = relevances[:k]
    return sum(topk)/max(k,1)
# TODO: build a labeled small set for retrieval and emotions, then compute metrics.
precision_at_k([1,0,1,0], k=3)


## Next steps
- Replace toy KB with curated snippets.
- Build a small labeled eval set for retrieval + emotion.
- (Optional) LoRA fine-tuning on domain text in a second notebook.
