<a href="https://colab.research.google.com/github/anmolmatharu/CareQuery-AI/blob/main/CareQuery.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import re
import json
import os
from flask import Flask, request, jsonify, render_template
!pip install azure-ai-textanalytics
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Concatenate
from tensorflow.keras.optimizers import Adam


Collecting azure-ai-textanalytics
  Downloading azure_ai_textanalytics-5.3.0-py3-none-any.whl.metadata (82 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/82.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.8/82.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting azure-core<2.0.0,>=1.24.0 (from azure-ai-textanalytics)
  Downloading azure_core-1.32.0-py3-none-any.whl.metadata (39 kB)
Collecting azure-common~=1.1 (from azure-ai-textanalytics)
  Downloading azure_common-1.1.28-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting isodate<1.0.0,>=0.6.1 (from azure-ai-textanalytics)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Downloading azure_ai_textanalytics-5.3.0-py3-none-any.whl (298 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.6/298.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading azure_common-1.1.28-py2.py3-none-any.whl (14 kB)
Downl

In [None]:
MEDICAL_ABBREVIATIONS = {
    "pt": "patient",
    "c/o": "complains of",
    "sob": "shortness of breath",
    "cp": "chest pain",
    "hx": "history",
    "dx": "diagnosis",
    "tx": "treatment",
    "fx": "fracture",
    "abd": "abdominal",
    "hr": "heart rate",
    "bp": "blood pressure",
    "temp": "temperature",
    "lab": "laboratory",
    "meds": "medications",
    "pm": "after meals",
    "prn": "as needed",
    "stat": "immediately",
    "bid": "twice daily",
    "tid": "three times daily",
    "qid": "four times daily",
    "po": "by mouth",
    "iv": "intravenous",
    "im": "intramuscular",
    "sc": "subcutaneous",
    "hs": "at bedtime",
    "yo": "year old",
    "y/o": "year old",
    "f/u": "follow up",
}

In [None]:
# Azure credentials setup
def get_azure_text_analytics_client():
    # i used my own when running
    #INSERT KEYS
    key = os.environ.get("AZURE_TEXT_ANALYTICS_KEY", "your_key_here")
    endpoint = os.environ.get("AZURE_TEXT_ANALYTICS_ENDPOINT", "your_endpoint_here")

    # Create a client
    credential = AzureKeyCredential(key)
    client = TextAnalyticsClient(endpoint=endpoint, credential=credential)
    return client

In [None]:
# Flask application setup
#TO RUN insert YOUR FLASK
app = Flask(__name__)

In [None]:
@app.before_first_request
def load_models():
    global grammar_model, medical_standardization_model

    #Using this NLP for transformer based architec
    # Load BERT model from TensorFlow Hub for grammar correction
    print("Loading grammar correction model...")
    bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
    bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

    # Create grammar correction model
    text_input = Input(shape=(), dtype=tf.string, name='text_input')
    preprocessed_text = bert_preprocess(text_input)
    outputs = bert_encoder(preprocessed_text)

    # Use the pooled output for sequence classification
    pooled_output = outputs["pooled_output"]
    dropout = Dropout(0.1)(pooled_output)
    grammar_output = Dense(1, activation='sigmoid', name='grammar_output')(dropout)

    grammar_model = Model(inputs=[text_input], outputs=[grammar_output])
    grammar_model.compile(optimizer=Adam(lr=3e-5), loss='binary_crossentropy', metrics=['accuracy'])

    # USE for later on in deployment:save for now load pre-trained weights
    # grammar_model.load_weights('grammar_model_weights.h5')

    #customization
    # Custom model for medical standardization using BERT embeddings
    print(" medical standardization model is loading...")
    medical_input = Input(shape=(), dtype=tf.string, name='medical_input')
    preprocessed_medical = bert_preprocess(medical_input)
    medical_outputs = bert_encoder(preprocessed_medical)

    # Use the sequence output for token-level classification
    sequence_output = medical_outputs["sequence_output"]
    dropout_seq = Dropout(0.1)(sequence_output)
    medical_output = Dense(768, activation='relu')(dropout_seq)
    medical_output = Dense(256, activation='relu')(medical_output)
    medical_output = Dense(128, name='medical_term_encoding')(medical_output)

    medical_standardization_model = Model(inputs=[medical_input], outputs=[medical_output])
    # medical_standardization_model.load_weights('medical_model_weights.h5')

    print("yayy models loaded successfully!")

In [None]:
# Text preprocessing functions
def preprocess_text(text):
    """Basic text preprocessing"""
    text = text.lower()
    text = re.sub(r'\s+', ' ', text)  # Remove extra whitespace
    text = text.strip()
    return text

In [None]:
def expand_abbreviations(text):
    """Expand common medical abbreviations"""


    words = text.split()
    expanded_words = []

    for word in words:
        # Remove punctuation for lookup
        #
        #
        clean_word = re.sub(r'[^\w/]', '', word)

        if clean_word.lower() in MEDICAL_ABBREVIATIONS:
            # Replace with expanded term but keep original punctuation
            punctuation = re.sub(r'[\w/]', '', word)
            expanded_word = MEDICAL_ABBREVIATIONS[clean_word.lower()] + punctuation
            expanded_words.append(expanded_word)
        else:
            expanded_words.append(word)

    return ' '.join(expanded_words)

def standardize_medical_terminology(text, azure_client=None):
    """Standardize medical terminology using Azure Text Analytics for Health"""
    if azure_client:
        try:
            # Use Azure Text Analytics for Health
            #extracting and standardize medical entities
            documents = [text]
            response = azure_client.analyze_healthcare_entities(documents)

            # processing the recognized entities
            result = response[0]
            if not result.is_error:
                # need to add: incorporate the recognized entities - to standardize terminology
                #TO-DO

                standardized_text = text
                for entity in result.entities:
                    if entity.normalized_text and entity.normalized_text != entity.text:
                        standardized_text = standardized_text.replace(entity.text, entity.normalized_text)
                return standardized_text
        except Exception as e:
            print(f"Azure Text Analytics error: {e}")

    # Fallback to basic abbreviation expansion if Azure client is not available
    # or if there was an error with the Azure service
    return expand_abbreviations(text)
    def correct_grammar(text):
    """Apply grammar correction using TensorFlow model"""

    # NEED TO ADD STILL:TO-DO: you would use a sequence-to-sequence model
    # TO-DO:trained specifically for grammar correction

    # Simple rule-based corrections as fallback
    corrections = [
        (r'\bi\b', 'I'),  # Capitalize 'i'
        (r'\s+', ' '),    # Remove extra spaces
        (r'\.([a-zA-Z])', '. \\1'),  # Add space after period
        (r'\s+\.', '.'),  # Remove space before period
    ]

    corrected_text = text
    for pattern, replacement in corrections:
        corrected_text = re.sub(pattern, replacement, corrected_text)

    # Ensure first letter is capitalized
    if corrected_text and len(corrected_text) > 0:
        corrected_text = corrected_text[0].upper() + corrected_text[1:]

    return corrected_text

In [None]:
#STILL NEED TO APPLY CONTENT MOD AZURE STACK
def check_harmful_content(text, azure_client=None):
    """Check for harmful content using Azure Content Moderator"""
    # TO-DO: use Azure Content Moderator

    #CHANGE KEYS
    #idk i ust used these
    harmful_terms = [
        "overdose", "bad", "drugs", "illegal drugs", "abuse", "harmful"
    ]

    for term in harmful_terms:
        if term in text.lower():
            return True, f"Potentially harmful content detected: '{term}'"

    return False, ""
  def detect_pii(text, azure_client=None):
    """Detect and handle personally identifiable information using Azure services"""
    # TO DO: would use Azure's PII detection


    # Simple pattern matching for common PII
    pii_patterns = {
        'ssn': r'\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b',
        'phone': r'\b\d{3}[-\s]?\d{3}[-\s]?\d{4}\b',
        'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
        'dob': r'\b(0[1-9]|1[0-2])[-/](0[1-9]|[12][0-9]|3[01])[-/](19|20)\d{2}\b'
    }

    pii_found = {}
    for pii_type, pattern in pii_patterns.items():
        matches = re.findall(pattern, text)
        if matches:
            pii_found[pii_type] = matches


In [None]:
# Redact PII from text
    redacted_text = text
    for pii_type, matches in pii_found.items():
        for match in matches:
            if pii_type == 'ssn':
                redacted_text = redacted_text.replace(match, "[REDACTED SSN]")
            elif pii_type == 'phone':
                redacted_text = redacted_text.replace(match, "[REDACTED PHONE]")
            elif pii_type == 'email':
                redacted_text = redacted_text.replace(match, "[REDACTED EMAIL]")
            elif pii_type == 'dob':
                redacted_text = redacted_text.replace(match, "[REDACTED DOB]")

    return redacted_text, len(pii_found) > 0

In [None]:
def process_clinical_text(text, use_azure=False):
    """Main function to process and standardize clinical text"""
    # Setup Azure client if needed
    azure_client = get_azure_text_analytics_client() if use_azure else None

    # track the processing steps
    processing_steps = []

    # Step 1: preprocess text
    preprocessed_text = preprocess_text(text)
    if preprocessed_text != text:
        processing_steps.append({
            "step": "Preprocessing",
            "before": text,
            "after": preprocessed_text
        })

In [None]:
# Step 2: Check for thee harmful content
    is_harmful, harmful_msg = check_harmful_content(preprocessed_text, azure_client)
    if is_harmful:
        return {
            "original_text": text,
            "processed_text": text,
            "standardized": False,
            "error": harmful_msg,
            "processing_steps": processing_steps
        }

In [None]:
# Step 3: Handle the PII
    redacted_text, has_pii = detect_pii(preprocessed_text, azure_client)
    if has_pii:
        processing_steps.append({
            "step": "PII Detection",
            "before": preprocessed_text,
            "after": redacted_text
        })
        preprocessed_text = redacted_text

In [None]:
# Step 4: Standardize all the medical terms
    standardized_text = standardize_medical_terminology(preprocessed_text, azure_client)
    if standardized_text != preprocessed_text:
        processing_steps.append({
            "step": "Medical Terminology Standardization",
            "before": preprocessed_text,
            "after": standardized_text
        })

In [None]:
# Step 5: Correct grammar
    final_text = correct_grammar(standardized_text)
    if final_text != standardized_text:
        processing_steps.append({
            "step": "Grammar Correction",
            "before": standardized_text,
            "after": final_text
        })
return {
        "original_text": text,
        "processed_text": final_text,
        "standardized": True,
        "processing_steps": processing_steps
    }

In [None]:
# Flask routes
#UPDATE TO UR OWN FILE
@app.route('/')
def home():
    return render_template('index.html')

@app.route('/process', methods=['POST'])
def process_text():
    data = request.get_json()
    clinical_text = data.get('text', '')
    use_azure = data.get('use_azure', False)

    if not clinical_text:
        return jsonify({"error": "No text provided"}), 400

    result = process_clinical_text(clinical_text, use_azure)
    return jsonify(result)

@app.route('/examples')
def get_examples():
    examples = [
        "pt c/o sob after eating",
        "pt with hx of mi presents with cp",
        "f/u pt for lab results",
        "56 yo m with fever and cough",
        "pt on abx for uti, f/u in 2d"
    ]
    processed_examples = []

    for example in examples:
        result = process_clinical_text(example, False)
        processed_examples.append({
            "original": example,
            "processed": result["processed_text"]
        })

    return jsonify(processed_examples)

In [None]:
# Run the application
if __name__ == "__main__":
    app.run(debug=True)