In [13]:
import chromadb
from sentence_transformers import SentenceTransformer

In [14]:
client = chromadb.PersistentClient(path="../data/chroma_db")
collection = client.get_collection("mscac_projects")
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

In [15]:
def match_student(student_input, n_results=3):
    query = f"{student_input['headline']}. {student_input['background']}. {student_input['request']}"
    q_emb = embed_model.encode([query])
    res = collection.query(
        query_embeddings=q_emb,
        n_results=n_results,
        include=["metadatas", "documents", "distances"]
    )

    matches = []
    for doc, meta, dist in zip(res["documents"][0], res["metadatas"][0], res["distances"][0]):
        matches.append({
            "title": doc,
            "meta": meta,
            "distance": dist
        })

    top_meta = res["metadatas"][0][0]
    top_title = res["documents"][0][0]
    alumnus_name = top_meta.get("student", "the alumnus")

    email = f"""
Hi {alumnus_name},

{student_input['student_type']} {student_input['student_name']} just requested for advice on: {student_input['headline']}

A bit about them:
{student_input['background']}

They are seeking guidance on:
{student_input['request']}

Would you be open to connecting and sharing your experience on "{top_title}"?

{student_input['closing']}

Thanks,
MScAC Alumni Relations
"""

    return matches, email


In [18]:
# Example usage
if __name__ == "__main__":
    student_input = {
        "student_name": "Alex Chen",
        "student_type": "1st Year MScAC Student",
        "headline": "Interested in AI for GPU compilers",
        "background": "Focusing on parallel computing and compiler optimizations.",
        "request": "Looking for guidance on machine learning techniques in compiler design.",
        "closing": "Thank you for considering this!"
    }

    matches, email = match_student(student_input)

    print("=== MATCHES ===")
    for m in matches:
        meta = m["meta"]
        print(
            f"- {meta.get('student', 'Unknown')} "
            f"(Cohort {meta.get('cohort', '?')}), "
            f"Project: {m['title']}, "
            f"Partner: {meta.get('partner', 'Unknown')}"
        )

    print("\n=== Auto-generated Email ===")
    print(email)

=== MATCHES ===
- Zichuan Guan (Cohort 2022–23), Project: Machine learning for pattern-matching optimization in GPU compilers, Partner: AMD
- Eric Zheng (Cohort 2024–25), Project: AI Compiler Methods to Improve GPU Runtime Performance, Partner: AMD
- Govind Sudarshan (Cohort 2023–24), Project: Predicting Register Requirements for Program Optimization, Partner: AMD

=== Auto-generated Email ===

Hi Zichuan Guan,

1st Year MScAC Student Alex Chen just requested for advice on: Interested in AI for GPU compilers

A bit about them:
Focusing on parallel computing and compiler optimizations.

They are seeking guidance on:
Looking for guidance on machine learning techniques in compiler design.

Would you be open to connecting and sharing your experience on "Machine learning for pattern-matching optimization in GPU compilers"?

Thank you for considering this!

Thanks,
MScAC Alumni Relations

