<a href="https://colab.research.google.com/github/isikaykarakus/Foreo_AI_Internship/blob/main/Foreow1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install sentence-transformers faiss-cpu transformers pandas

import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline

# **Tiny demo dataset (10 rows)**

In [None]:
data = [
    {"phrase":"spill the tea","meaning":"share the gossip or secret","usage":"She spilled the tea about the new launch.","source_url":"https://example.com/urban/spill_the_tea"},
    {"phrase":"low-key","meaning":"subtly; not openly; a little bit","usage":"I’m low-key excited about this collab.","source_url":"https://example.com/urban/low_key"},
    {"phrase":"ghosting","meaning":"suddenly cutting off all communication","usage":"He stopped replying—total ghosting.","source_url":"https://example.com/urban/ghosting"},
    {"phrase":"stan","meaning":"an overzealous or obsessive fan","usage":"I stan that skincare brand.","source_url":"https://example.com/urban/stan"},
    {"phrase":"cap","meaning":"a lie; not true","usage":"They said the product is magic—sounds like cap.","source_url":"https://example.com/urban/cap"},
    {"phrase":"flex","meaning":"show off; boast","usage":"She flexed her skincare routine on IG.","source_url":"https://example.com/urban/flex"},
    {"phrase":"ratio","meaning":"a reply gets more likes than the original post (implies disagreement)","usage":"Their tweet got ratioed instantly.","source_url":"https://example.com/urban/ratio"},
    {"phrase":"simp","meaning":"someone who is overly attentive for affection/attention","usage":"Don’t simp for clout.","source_url":"https://example.com/urban/simp"},
    {"phrase":"yeet","meaning":"to throw with force; also an exclamation of excitement","usage":"Yeet that empty bottle in the bin!","source_url":"https://example.com/urban/yeet"},
    {"phrase":"mid","meaning":"average; not great","usage":"The results were mid tbh.","source_url":"https://example.com/urban/mid"},
]
df = pd.DataFrame(data)
df


In [None]:
# minimal normalisation
df["phrase_clean"] = df["phrase"].str.lower().str.strip()

# embed corpus text (phrase + meaning + usage)
corpus_texts = (df["phrase_clean"] + " — " + df["meaning"] + " — " + df["usage"]).tolist()
emb_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = emb_model.encode(corpus_texts, normalize_embeddings=True)

# FAISS index (cosine via inner product on normalised vectors)
index = faiss.IndexFlatIP(embeddings.shape[1])
index.add(embeddings)


In [None]:
# tiny generator
gen = pipeline("text2text-generation", model="google/flan-t5-small", max_new_tokens=120)

def search(query, k=3):
    q_emb = emb_model.encode([query], normalize_embeddings=True)
    D, I = index.search(q_emb, k)
    hits = df.iloc[I[0]].copy()
    hits["score"] = [float(s) for s in D[0]]
    return hits

def explain(query, k=3):
    hits = search(query, k)
    context = "\n".join([f"- {r.phrase}: {r.meaning} (e.g., {r.usage})" for _, r in hits.iterrows()])
    prompt = (
        f"Explain the slang '{query}' in clear, simple English. "
        f"Use the examples below for context. Add a short cultural note if helpful.\n\n{context}\n\nAnswer:"
    )
    out = gen(prompt)[0]["generated_text"]
    return hits[["phrase","meaning","usage","source_url","score"]], out

table, answer = explain("spill the tea", k=3)
display(table)
print("\n--- EXPLANATION ---\n", answer)


In [None]:
table, answer = explain("low-key", k=3)
display(table)
print("\n--- EXPLANATION ---\n", answer)
