In [1]:
pip install sentence-transformers scikit-learn



In [6]:
import spacy
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load SpaCy and pre-trained embedding model
nlp = spacy.load("en_core_web_sm")
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Image type and disease keyword expansions
image_type_terms = ["x-ray", "mri", "ct scan", "pathology", "skin lesion", "retina", "blood sample", "echocardiogram", "tomography"]
disease_terms = ["pneumonia", "tumor", "covid", "covid-19", "lung disease", "leukemia", "eye disease", "cardiovascular disease", "neurological disease", "alzheimer’s disease"]

# Embed terms for flexible matching
image_type_embeddings = embedding_model.encode(image_type_terms)
disease_embeddings = embedding_model.encode(disease_terms)

# Match terms based on similarity
def find_closest_match(query, terms, term_embeddings):
    query_embedding = embedding_model.encode([query])
    similarities = cosine_similarity(query_embedding, term_embeddings)[0]
    best_idx = similarities.argmax()
    best_score = similarities[best_idx]
    return terms[best_idx] if best_score > 0.5 else None

# Detect image type and disease using NLP and similarity
def extract_labels_dynamic(query):
    doc = nlp(query.lower())
    detected_image_type = None
    detected_disease = None

    # Use tokens and embeddings to find closest matches
    for token in doc:
        if not detected_image_type:
            detected_image_type = find_closest_match(token.text, image_type_terms, image_type_embeddings)
        if not detected_disease:
            detected_disease = find_closest_match(token.text, disease_terms, disease_embeddings)

    return detected_image_type, detected_disease

# Metrics evaluation for test accuracy
def evaluate_query_matcher(queries):
    correct_disease_count = 0
    correct_image_count = 0
    total_queries = len(queries)

    for query_text, expected_image_type, expected_disease in queries:
        # Extract detected image type and disease from the query
        detected_image_type, detected_disease = extract_labels_dynamic(query_text)

        # Correct Disease
        is_correct_disease = detected_disease == expected_disease
        correct_disease_count += is_correct_disease

        # Correct Image Type
        is_correct_image = detected_image_type == expected_image_type
        correct_image_count += is_correct_image

        print(f"Query: {query_text}")
        print(f"Expected Image Type: {expected_image_type}, Detected Image Type: {detected_image_type}")
        print(f"Expected Disease: {expected_disease}, Detected Disease: {detected_disease}")
        print(f"Correct Image Type: {is_correct_image}, Correct Disease: {is_correct_disease}\n")

    # Calculate metrics
    disease_accuracy = correct_disease_count / total_queries * 100
    image_accuracy = correct_image_count / total_queries * 100

    print(f"Accuracy Metrics:")
    print(f"Disease Accuracy: {disease_accuracy:.2f}%")
    print(f"Image Type Accuracy: {image_accuracy:.2f}%")


In [7]:

# Example queries with expected outputs
queries = [
    ("I need a model that can classify chest x-ray images to detect lung diseases.", "x-ray", "lung disease"),
    ("I need a model to classify pneumonia from chest x-ray images.", "x-ray", "pneumonia"),
    ("I need to classify pathology images into different types of tissue categories.", "pathology", None),
    ("I need a model that can classify different types of skin lesions.", "skin lesion", None),
    ("I am looking for a model that can classify optical coherence tomography images for eye disease detection.", "tomography", "eye disease"),
    ("I need a model that can analyze chest x-rays and identify different diseases.", "x-ray", None),
    ("I want to detect retinal diseases using tomography images.", "tomography", "eye disease"),
    ("I'm looking for a model that can help classify dermatology images into different categories.", "skin lesion", None),
    ("I need to classify lung infections like pneumonia using x-ray images.", "x-ray", "pneumonia"),
    ("I need a model to classify pathology scans based on tissue features.", "pathology", None),
    ("Can the model detect pneumonia in chest x-rays?", "x-ray", "pneumonia"),
    ("Can the model detect eye diseases from tomography images?", "tomography", "eye disease"),
    ("I need a model to classify brain scans to detect Alzheimer’s disease.", "mri", "neurological disease"),
    ("I need a model to classify cardiovascular diseases using MRI scans.", "mri", "cardiovascular disease"),
    ("I need a model to classify blood samples to detect leukemia.", "blood sample", "leukemia"),
    ("Can the model detect COVID-19 from lung CT scans?", "ct scan", "covid-19"),
    ("Is there a model for identifying car damage in accident photos?", None, None),
    ("Can the model classify handwritten text from ancient manuscripts?", None, None),
    ("Can the model identify objects in satellite images?", None, None)
]

# Evaluate the queries
evaluate_query_matcher(queries)

Query: I need a model that can classify chest x-ray images to detect lung diseases.
Expected Image Type: x-ray, Detected Image Type: x-ray
Expected Disease: lung disease, Detected Disease: lung disease
Correct Image Type: True, Correct Disease: True

Query: I need a model to classify pneumonia from chest x-ray images.
Expected Image Type: x-ray, Detected Image Type: x-ray
Expected Disease: pneumonia, Detected Disease: pneumonia
Correct Image Type: True, Correct Disease: True

Query: I need to classify pathology images into different types of tissue categories.
Expected Image Type: pathology, Detected Image Type: pathology
Expected Disease: None, Detected Disease: tumor
Correct Image Type: True, Correct Disease: False

Query: I need a model that can classify different types of skin lesions.
Expected Image Type: skin lesion, Detected Image Type: skin lesion
Expected Disease: None, Detected Disease: tumor
Correct Image Type: True, Correct Disease: False

Query: I am looking for a model th

In [11]:
query = [('scans of lungs', "x-ray", "lung disease")]
evaluate_query_matcher(query)

Query: scans of lungs
Expected Image Type: x-ray, Detected Image Type: ct scan
Expected Disease: lung disease, Detected Disease: lung disease
Correct Image Type: False, Correct Disease: True

Accuracy Metrics:
Disease Accuracy: 100.00%
Image Type Accuracy: 0.00%
