In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns
import matplotlib.pyplot as plt

# Sample medical transcripts (replace with your actual transcripts)
medical_transcripts = [
    "Patient presented with symptoms of cough and shortness of breath.",
    "Physical examination revealed elevated temperature and wheezing.",
    "Diagnosis confirmed as bronchitis, prescribed antibiotics and inhaler.",
    "Patient complains of chest pain and difficulty breathing.",
    "Lab results indicate high levels of inflammation.",
    "Treatment plan includes rest, hydration, and pain management.",
    "Patient discharged with instructions to follow up in one week.",
    "Patient history includes hypertension and diabetes.",
    "X-ray shows signs of pneumonia in the left lung.",
    "Administered IV fluids and antibiotics.",
    "Scheduled for follow-up appointment with primary care physician.",
    "Family history of heart disease and asthma.",
    "Performed ECG to monitor heart function.",
    "Patient referred to cardiologist for further evaluation.",
    "Recommend lifestyle changes and medication adherence."
]

# Convert transcripts into n-grams
ngram_range = (2, 3)  # Adjust as needed
vectorizer = CountVectorizer(analyzer='word', ngram_range=ngram_range)
X = vectorizer.fit_transform(medical_transcripts)

# Calculate cosine similarity between n-grams
cosine_sim = cosine_similarity(X.T, X.T)

# Plot the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cosine_sim, annot=True, cmap='YlGnBu', xticklabels=vectorizer.get_feature_names_out(), yticklabels=vectorizer.get_feature_names_out())
plt.title('Cosine Similarity between N-grams in Medical Transcripts')
plt.xlabel('N-grams')
plt.ylabel('N-grams')
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
