In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

# Sample diagnosis text documents mentioning specific diseases
documents = [
    "Patient presents with fever, cough, and fatigue, suggestive of influenza.",
    "Abdominal pain, bloating, and diarrhea indicate possible gastroenteritis.",
    "Joint pain, swelling, and stiffness are common in rheumatoid arthritis.",
    "Skin rash, itching, and redness may be symptoms of eczema.",
    "Headache, dizziness, and blurred vision could indicate migraine.",
    "Chest pain, shortness of breath, and palpitations may signal heart disease.",
    "Excessive thirst, frequent urination, and fatigue are signs of diabetes.",
    "Mood swings, sadness, and loss of interest may indicate depression.",
    "Muscle weakness, tremors, and fatigue can occur in multiple sclerosis.",
    "Jaundice, abdominal pain, and dark urine suggest hepatitis.",
    "Frequent infections, fatigue, and swollen lymph nodes indicate HIV.",
    "Vision changes, eye pain, and sensitivity to light may suggest glaucoma.",
    "Chest tightness, wheezing, and coughing are symptoms of asthma.",
    "Frequent falls, memory loss, and confusion are signs of Alzheimer's disease.",
    "Back pain, numbness, and tingling could be due to sciatica.",
    "Swollen glands, sore throat, and fever may indicate strep throat.",
    "Excessive sweating, weight loss, and palpitations may point to hyperthyroidism.",
    "Abdominal discomfort, bloating, and changes in bowel movements suggest IBS.",
    "Joint stiffness, swelling, and limited range of motion are common in arthritis.",
    "Fever, chills, and body aches are typical of the flu."
]

# Function to compute TF-IDF with exact word presence
def compute_tfidf(documents):
    vectorizer = TfidfVectorizer(ngram_range=(1, 1), binary=True)
    tfidf_matrix = vectorizer.fit_transform(documents)
    return tfidf_matrix, vectorizer

# Function to train the Nearest Neighbors model
def train_nn_model(documents):
    tfidf_matrix, vectorizer = compute_tfidf(documents)
    nn_model = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(tfidf_matrix)
    return nn_model, vectorizer

# Function to recommend similar documents
def recommend_similar_documents(model, vectorizer, documents, new_diagnoses):
    new_tfidf_matrix = vectorizer.transform(new_diagnoses)
    distances, indices = model.kneighbors(new_tfidf_matrix, n_neighbors=5)
    recommendations = []
    for i in range(len(new_diagnoses)):
        similar_docs = [documents[idx] for idx in indices[i]]
        recommendations.append(similar_docs)
    return recommendations

# Train the Nearest Neighbors model
nn_model, tfidf_vectorizer = train_nn_model(documents)

# New diagnosis descriptions for recommendation
new_diagnoses = [
    "Patient presents with severe abdominal pain and vomiting.",
    "The appearance of a rash with fever and joint pain.",
    "Symptoms of fatigue, dizziness, and shortness of breath."
]

# Get recommendations for new diagnosis descriptions
recommendations = recommend_similar_documents(nn_model, tfidf_vectorizer, documents, new_diagnoses)

# Print recommendations
for diagnosis, recs in zip(new_diagnoses, recommendations):
    print(f"Diagnosis: '{diagnosis}' => Similar Diagnoses:")
    for rec in recs:
        print(f" - {rec}")
    print()


Diagnosis: 'Patient presents with severe abdominal pain and vomiting.' => Similar Diagnoses:
 - Patient presents with fever, cough, and fatigue, suggestive of influenza.
 - Abdominal pain, bloating, and diarrhea indicate possible gastroenteritis.
 - Jaundice, abdominal pain, and dark urine suggest hepatitis.
 - Abdominal discomfort, bloating, and changes in bowel movements suggest IBS.
 - Joint pain, swelling, and stiffness are common in rheumatoid arthritis.

Diagnosis: 'The appearance of a rash with fever and joint pain.' => Similar Diagnoses:
 - Fever, chills, and body aches are typical of the flu.
 - Patient presents with fever, cough, and fatigue, suggestive of influenza.
 - Skin rash, itching, and redness may be symptoms of eczema.
 - Joint pain, swelling, and stiffness are common in rheumatoid arthritis.
 - Joint stiffness, swelling, and limited range of motion are common in arthritis.

Diagnosis: 'Symptoms of fatigue, dizziness, and shortness of breath.' => Similar Diagnoses:
 