![(ethics-ai4-1.png](ethics-ai4-1.png)


# Ethical Guidelines Classifier Function
#### by Frank Metty started on 02_20_2025

**Ethical Guidelines** is important to classify the questions being asked in the ThriveAi application. It provided constraints to each question on a scale of 1-7.

**Intent Class	Description 	                                                         Response Category**

1	            Question with database domain	                                         Providing an Answer
2	            Question not within database domain	                                     No Answer - Redirect to LLM
3	            Inappropriate Question  Practicing medicine	                             Inappropriate Question - Medical
4	            Inappropriate Question  Privacy or PHI 	                                Inappropriate Question - Privacy/PHI
5	            Dr with attribution for patient	                                         Physician with Patient Attribution
6	            Dr with OUT attribution for patient	                                     Physician without Patient Attribution
7	            Dangerous, racist, bullying, unethical	                         Dangerous, Racist, Bullying, Unethical Content

## Step 1 - Import Necessary Libraries

In [None]:
import json
import os
import datetime

## Step 2 - Define the Function

In [None]:
def get_ethical_guidelines(question, log_file="training_data.json"):
    """
    Classifies a question into a category (1-7) based on keywords.
    No ML model is used, just keyword matching.
    """
    category_keywords = {
        1: [  # Answer provided by ThriveAi
            "symptoms", "blood pressure", "diabetes", "heart disease", "stroke", "hospitalization",
            "COPD", "cancer", "mortality", "disease", "asthma", "hypertension", "obesity", "cholesterol",
            "arthritis", "depression", "mental health", "injuries", "infection", "cardiovascular", "pain", "illness"
        ],
        2: [  # Redirect to ChatGPT
            "investing", "stock market", "insurance", "legal rights", "finance", "money", "budget",
            "tax", "retirement", "loan", "mortgage", "bankruptcy", "real estate", "economy", "credit score",
            "lawsuit", "contract", "policy", "business", "regulations", "financial planning","AI", "blockchain", "5G", "trends", "future",
            "machine learning", "biotechnology", "digital health", "telemedicine", "virtual reality",
            "wearable devices", "genetics", "robotics in medicine", "quantum computing", "innovation"
        ],
        3: [  # Inappropriate Question - Medical
            "vaccine", "safe", "pregnant", "COVID", "medical advice", "diagnose", "condition",
            "medication", "side effects", "doctor recommendation", "prescription", "therapy",
            "mental health treatment", "surgery", "alternative medicine", "home remedies",
            "is it normal if", "should I be worried about", "self-diagnose", "medical treatment","Ozempic","food", 
            "diet", "nutrition", "exercise", "lifestyle", "weight loss", "calories",
            "healthy eating", "workout", "fitness", "yoga", "gym", "meal plan", "hydration",
            "intermittent fasting", "superfoods", "organic food", "vegan", "protein intake", "lbs", "pounds"
        ],
        4: [  # Inappropriate Question - Privacy/PHI
            "medical records", "patient information", "primary care", "hospital access",
            "health history", "lab results", "test results", "prescription details", "MRI scans",
            "blood test results", "X-ray", "health data", "hospital database", "personal medical records",
            "cybersecurity", "data privacy",
        ],
        5: [  # Physician with Patient Attribution
            "my patient","follow-up appointment ","schedule consultation","patient discharge summary",
            "referral for a specialist", "patient transfer request", "continuity of care","care coordination",
            "patient's active medications"
        ],
        6: [  # Physician with no Attribution
            "check someone’s medical record","access patient data outside my clinic","radiology report for a non-assigned patient",
            "lab results of someone outside my care","how can I get the medical history of someone else's patient?","get prescription records of someone not in my care",
            "treatment plan for a patient not under my care","lab results of someone outside my care","medication history of a non-assigned patient"
        ],
        7: [  # Illegal, unethical, or privacy-violating requests
            "hack", "steal", "database", "access personal", "illegal", "fake license", "credit card",
            "cheating", "black market", "bypass security", "exploit", "leak data", "buy medical records",
            "find personal details", "spy", "forged documents", "deep web", "scam", "identity theft",
            "breach system", "fake ID", "get private data", "sell patient information", "phishing",
            "unauthorized access", "dark web", "leak passwords"
        ]
    }

    # Convert question to lowercase for case-insensitive matching
    question_lower = question.lower()

    # Assign a category based on keywords
    assigned_category = "Unknown"
    for category, keywords in category_keywords.items():
        if any(keyword in question_lower for keyword in keywords):
            assigned_category = category
            break

    # Format timestamp to be more readable
    friendly_timestamp = datetime.datetime.now().strftime("%b %d, %Y - %I:%M %p")
            
    # Prepare log entry
    log_entry = {
        "Timestamp": friendly_timestamp,  # Human-readable format
        "Question": question.strip(),
        "Ethical Guideline Class": assigned_category
    }

   # Load existing training data
    if os.path.exists(log_file):
        with open(log_file, "r", encoding="utf-8") as file:
            try:
                log_data = json.load(file)
            except json.JSONDecodeError:
                log_data = []
    else:
        log_data = []

    # Check if this question-category pair already exists
    if any(entry["Question"] == question and entry["Ethical Guideline Class"] == assigned_category for entry in log_data):
        print(f"Duplicate entry detected. No changes made for: '{question}'")
        return assigned_category  # Do not add duplicate

    # Append new entry since it does not exist
    log_data.append(log_entry)

    # Save updated log to file
    with open(log_file, "w", encoding="utf-8") as file:
        json.dump(log_data, file, indent=4)

    return assigned_category  # Return classification result

# Example Usage
questions = [
    "What are the side effects of taking too much vitamin D?",
    "How can I access a patient's MRI scan outside my hospital?",
    "My patient needs a referral for a specialist.",
    "How can I hack into my doctor's patient portal?",
    "What are the symptoms of diabetes?",
    "I want to check the medical records of someone outside my care.",
    "What is the best way to invest in the stock market?",
    "what is my primary care provider's name?",
    "How can I lose 5 lbs in a month?"
    
]

for q in questions:
    result = get_ethical_guidelines(q)
    print(f"Question: {q}\nEthical Guideline Class: {result}\n")

# Check the generated JSON file (training_data.json) for logged questions.