In [None]:

import pandas as pd
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("all-MiniLM-L6-v2")

patients = pd.DataFrame([
    {
        "patient_id": "P001",
        "age": 55,
        "gender": "Female",
        "description": "55-year-old woman with high blood pressure and no history of stroke or kidney issues"
    },
    {
        "patient_id": "P002",
        "age": 60,
        "gender": "Male",
        "description": "60-year-old man with Type 2 Diabetes, not on insulin, normal liver function"
    },
    {
        "patient_id": "P003",
        "age": 50,
        "gender": "Female",
        "description": "50-year-old woman with chronic kidney disease and hypertension"
    }
])


trials = pd.DataFrame([
    {
        "trial_id": "T001",
        "name": "Hypertension Study",
        "min_age": 40,
        "max_age": 65,
        "gender": "Any",
        "inclusion_criteria": "Patients aged 40-65 with diagnosed hypertension",
        "exclusion_criteria": "History of stroke or kidney disease"
    },
    {
        "trial_id": "T002",
        "name": "Diabetes Type 2 Trial",
        "min_age": 30,
        "max_age": 70,
        "gender": "Male",
        "inclusion_criteria": "Type 2 Diabetes, aged 30-70, not on insulin",
        "exclusion_criteria": "Liver dysfunction"
    },
])

def hybrid_match(patients, trials, semantic_threshold=0.3):
    results = []

    for _, patient in patients.iterrows():
        print(f"\nChecking Patient: {patient['patient_id']}")
        for _, trial in trials.iterrows():
            print(f"  → Trial: {trial['trial_id']} ({trial['name']})")

            # --- Rule-Based Checks ---
            age_match = trial["min_age"] <= patient["age"] <= trial["max_age"]
            gender_match = (trial["gender"] == "Any") or (trial["gender"].lower() == patient["gender"].lower())

            print(f"   Age Match: {age_match} \n Gender Match: {gender_match}")


            excluded = False

            if not (age_match and gender_match) or excluded:
                print("    Skipped due to rule-based filtering.")
                continue

            trial_text = trial["inclusion_criteria"] + ". " + trial["exclusion_criteria"]
            trial_emb = model.encode(trial_text, convert_to_tensor=True)
            patient_emb = model.encode(patient["description"], convert_to_tensor=True)

            similarity = util.cos_sim(patient_emb, trial_emb).item()
            print(f"    Semantic Similarity: {similarity:.3f}")

            if similarity >= semantic_threshold:
                results.append({
                    "patient_id": patient["patient_id"],
                    "trial_id": trial["trial_id"],
                    "trial_name": trial["name"],
                    "similarity_score": round(similarity, 4)
                })

    if results:
        return pd.DataFrame(results).sort_values(by=["patient_id", "similarity_score"], ascending=[True, False])
    else:
        print("\n No matches found. Try lowering the threshold or check patient/trial data.")
        return pd.DataFrame(columns=["patient_id", "trial_id", "trial_name", "similarity_score"])


matches = hybrid_match(patients, trials, semantic_threshold=0.3)

print("\n Final Matches:")
print(matches)
