In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
import logging
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import gc

pd.set_option('display.max_columns', None)
sns.set_theme()  
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = [12, 8]

sys.path.append('../src')

In [None]:
from Embedding import ClinicalEmbeddingProcessor

In [2]:
# Initialize the processor
processor = ClinicalEmbeddingProcessor(
    processed_path='../data/processed',
    model_name='emilyalsentzer/Bio_ClinicalBERT'
)

processor.create_split(validation_size=0.003)

# embeddings
processor.create_embeddings(batch_size=32)

new_case = {
    'age_group': '31-50',
    'gender': 'F',
    'diagnoses': ['Depression'],
    'medications': 'sertraline 50mg oral',
    'sections': {
        'history': 'history of depression, poor response to SSRIs',
        'plan': 'consider medication adjustment'
    }
}
case_text = processor._prepare_text_for_embedding(pd.Series(new_case))
case_embedding = processor.get_case_embedding(case_text)

2024-11-16 17:15:38,654 - INFO - Loading emilyalsentzer/Bio_ClinicalBERT on cuda
2024-11-16 17:15:43,930 - INFO - Creating train-validation split...


Loading cases:   0%|          | 0/45 [00:00<?, ?it/s]

Saving train chunks:   0%|          | 0/142 [00:00<?, ?it/s]

2024-11-16 17:15:57,469 - INFO - Split completed: {'total_cases': 141624, 'train_cases': 141293, 'val_cases': 331, 'train_patients': 7601, 'val_patients': 22}
2024-11-16 17:15:57,615 - INFO - Creating embeddings for training set...
2024-11-16 17:15:57,618 - INFO - Processing file 1/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:16:10,552 - INFO - Processing file 2/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:16:23,395 - INFO - Processing file 3/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:16:36,137 - INFO - Processing file 4/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:16:48,839 - INFO - Processing file 5/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:17:01,582 - INFO - Processing file 6/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:17:14,291 - INFO - Processing file 7/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:17:26,978 - INFO - Processing file 8/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:17:39,720 - INFO - Processing file 9/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:17:52,516 - INFO - Processing file 10/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:18:05,199 - INFO - Processing file 11/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:18:17,980 - INFO - Processing file 12/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:18:30,799 - INFO - Processing file 13/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:18:43,656 - INFO - Processing file 14/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:18:56,457 - INFO - Processing file 15/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:19:09,298 - INFO - Processing file 16/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:19:22,077 - INFO - Processing file 17/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:19:34,898 - INFO - Processing file 18/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:19:47,811 - INFO - Processing file 19/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:20:00,653 - INFO - Processing file 20/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:20:13,434 - INFO - Processing file 21/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:20:26,193 - INFO - Processing file 22/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:20:38,902 - INFO - Processing file 23/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:20:51,751 - INFO - Processing file 24/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:21:04,584 - INFO - Processing file 25/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:21:17,452 - INFO - Processing file 26/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:21:30,244 - INFO - Processing file 27/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:21:42,990 - INFO - Processing file 28/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:21:55,881 - INFO - Processing file 29/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:22:08,759 - INFO - Processing file 30/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:22:21,623 - INFO - Processing file 31/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:22:34,371 - INFO - Processing file 32/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:22:47,016 - INFO - Processing file 33/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:22:59,896 - INFO - Processing file 34/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:23:12,620 - INFO - Processing file 35/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:23:25,262 - INFO - Processing file 36/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:23:38,193 - INFO - Processing file 37/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:23:50,879 - INFO - Processing file 38/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:24:03,621 - INFO - Processing file 39/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:24:16,391 - INFO - Processing file 40/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:24:29,147 - INFO - Processing file 41/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:24:41,902 - INFO - Processing file 42/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:24:54,628 - INFO - Processing file 43/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:25:07,398 - INFO - Processing file 44/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:25:20,176 - INFO - Processing file 45/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:25:33,034 - INFO - Processing file 46/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:25:45,817 - INFO - Processing file 47/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:25:58,725 - INFO - Processing file 48/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:26:11,453 - INFO - Processing file 49/142


Creating embeddings:   0%|          | 0/10 [00:00<?, ?it/s]

2024-11-16 17:26:15,325 - INFO - Processing file 50/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:26:28,179 - INFO - Processing file 51/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:26:40,974 - INFO - Processing file 52/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:26:53,783 - INFO - Processing file 53/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:27:06,558 - INFO - Processing file 54/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:27:19,299 - INFO - Processing file 55/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:27:32,063 - INFO - Processing file 56/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:27:44,753 - INFO - Processing file 57/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:27:57,468 - INFO - Processing file 58/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:28:10,249 - INFO - Processing file 59/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:28:23,039 - INFO - Processing file 60/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:28:35,841 - INFO - Processing file 61/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:28:48,592 - INFO - Processing file 62/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:29:01,399 - INFO - Processing file 63/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:29:14,284 - INFO - Processing file 64/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:29:27,065 - INFO - Processing file 65/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:29:39,791 - INFO - Processing file 66/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:29:52,482 - INFO - Processing file 67/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:30:05,296 - INFO - Processing file 68/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:30:18,071 - INFO - Processing file 69/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:30:30,797 - INFO - Processing file 70/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:30:43,655 - INFO - Processing file 71/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:30:56,387 - INFO - Processing file 72/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:31:09,120 - INFO - Processing file 73/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:31:21,866 - INFO - Processing file 74/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:31:34,665 - INFO - Processing file 75/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:31:47,440 - INFO - Processing file 76/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:32:00,168 - INFO - Processing file 77/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:32:12,920 - INFO - Processing file 78/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:32:25,563 - INFO - Processing file 79/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:32:38,293 - INFO - Processing file 80/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:32:51,062 - INFO - Processing file 81/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:33:03,814 - INFO - Processing file 82/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:33:16,627 - INFO - Processing file 83/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:33:29,396 - INFO - Processing file 84/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:33:42,255 - INFO - Processing file 85/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:33:55,067 - INFO - Processing file 86/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:34:07,879 - INFO - Processing file 87/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:34:20,623 - INFO - Processing file 88/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:34:33,461 - INFO - Processing file 89/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:34:46,172 - INFO - Processing file 90/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:34:59,593 - INFO - Processing file 91/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:35:12,397 - INFO - Processing file 92/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:35:25,145 - INFO - Processing file 93/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:35:37,829 - INFO - Processing file 94/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:35:50,549 - INFO - Processing file 95/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:36:03,233 - INFO - Processing file 96/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:36:16,036 - INFO - Processing file 97/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:36:28,860 - INFO - Processing file 98/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:36:41,872 - INFO - Processing file 99/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:36:54,874 - INFO - Processing file 100/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:37:07,898 - INFO - Processing file 101/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:37:20,657 - INFO - Processing file 102/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:37:33,510 - INFO - Processing file 103/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:37:46,422 - INFO - Processing file 104/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:37:59,383 - INFO - Processing file 105/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:38:12,171 - INFO - Processing file 106/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:38:24,953 - INFO - Processing file 107/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:38:37,732 - INFO - Processing file 108/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:38:50,502 - INFO - Processing file 109/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:39:03,307 - INFO - Processing file 110/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:39:16,131 - INFO - Processing file 111/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:39:28,832 - INFO - Processing file 112/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:39:41,524 - INFO - Processing file 113/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:39:54,323 - INFO - Processing file 114/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:40:07,089 - INFO - Processing file 115/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:40:19,921 - INFO - Processing file 116/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:40:32,750 - INFO - Processing file 117/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:40:45,541 - INFO - Processing file 118/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:40:58,391 - INFO - Processing file 119/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:41:11,375 - INFO - Processing file 120/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:41:24,129 - INFO - Processing file 121/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:41:36,984 - INFO - Processing file 122/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:41:49,826 - INFO - Processing file 123/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:42:02,643 - INFO - Processing file 124/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:42:15,525 - INFO - Processing file 125/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:42:28,439 - INFO - Processing file 126/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:42:41,315 - INFO - Processing file 127/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:42:54,221 - INFO - Processing file 128/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:43:07,006 - INFO - Processing file 129/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:43:19,805 - INFO - Processing file 130/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:43:32,574 - INFO - Processing file 131/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:43:45,394 - INFO - Processing file 132/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:43:58,239 - INFO - Processing file 133/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:44:11,091 - INFO - Processing file 134/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:44:23,883 - INFO - Processing file 135/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:44:36,686 - INFO - Processing file 136/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:44:49,497 - INFO - Processing file 137/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:45:02,372 - INFO - Processing file 138/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:45:15,170 - INFO - Processing file 139/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:45:27,927 - INFO - Processing file 140/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:45:40,728 - INFO - Processing file 141/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

2024-11-16 17:45:53,668 - INFO - Processing file 142/142


Creating embeddings:   0%|          | 0/32 [00:00<?, ?it/s]

In [2]:
from Retrival import ClinicalRetrievalSystem

In [3]:

try:
    retriever = ClinicalRetrievalSystem(
        processed_path='../data/processed',
        index_type='Flat',
        use_gpu=True
    )
    
    print("\nSystem Info:")
    print(f"Embeddings type: {type(retriever.embeddings)}")
    print(f"Embeddings dtype: {retriever.embeddings.dtype}")
    print(f"Embeddings shape: {retriever.embeddings.shape}")
    print(f"Is C-contiguous: {retriever.embeddings.flags.c_contiguous}")
    
    retriever.save_index()

except Exception as e:
    print(f"\nError occurred: {str(e)}")
    import traceback
    traceback.print_exc()

2024-11-27 17:42:44,570 - INFO - Using CPU for FAISS
2024-11-27 17:42:44,571 - INFO - Loading embeddings and metadata...
2024-11-27 17:42:44,573 - INFO - Processing file 1/142
  data = torch.load(file, map_location='cpu')
2024-11-27 17:42:44,700 - INFO - Processing file 2/142
2024-11-27 17:42:44,805 - INFO - Processing file 3/142
2024-11-27 17:42:44,945 - INFO - Processing file 4/142
2024-11-27 17:42:45,082 - INFO - Processing file 5/142
2024-11-27 17:42:45,190 - INFO - Processing file 6/142
2024-11-27 17:42:45,295 - INFO - Processing file 7/142
2024-11-27 17:42:45,431 - INFO - Processing file 8/142
2024-11-27 17:42:45,722 - INFO - Processing file 9/142
2024-11-27 17:42:45,841 - INFO - Processing file 10/142
2024-11-27 17:42:45,980 - INFO - Processing file 11/142
2024-11-27 17:42:46,081 - INFO - Processing file 12/142
2024-11-27 17:42:46,183 - INFO - Processing file 13/142
2024-11-27 17:42:46,284 - INFO - Processing file 14/142
2024-11-27 17:42:46,384 - INFO - Processing file 15/142
20


System Info:
Embeddings type: <class 'numpy.ndarray'>
Embeddings dtype: float32
Embeddings shape: (141293, 768)
Is C-contiguous: True


2024-11-27 17:43:05,527 - INFO - Saved system to ../data/processed/retrieval_system


In [4]:
import sys
import numpy as np
import faiss
import torch

print("Version Information:")
print(f"Python: {sys.version}")
print(f"NumPy: {np.__version__}")
print(f"FAISS: {faiss.__version__}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU available: {faiss.get_num_gpus()}")

# test FAISS
try:
    d = 64                           # dimension
    nb = 100                         # database size
    xb = np.random.random((nb, d)).astype('float32')
    
    # make index
    index = faiss.IndexFlatL2(d)     # build the index in L2 space
    print(f"\nFAISS test index is_trained: {index.is_trained}")
    
    # add vectors to index
    index.add(xb)                    
    print(f"FAISS test index ntotal: {index.ntotal}")
    
    # search for nearest neighbors
    k = 4                            # we want to see 4 nearest neighbors
    nq = 1                           # only 1 query
    xq = np.random.random((nq, d)).astype('float32')
    
    D, I = index.search(xq, k)       # actual search
    print(f"FAISS test search results shape: {D.shape}")
    
    print("\nFAISS basic functionality test passed!")
    
except Exception as e:
    print(f"\nFAISS test failed: {str(e)}")

Version Information:
Python: 3.12.7 | packaged by conda-forge | (main, Oct  4 2024, 16:05:46) [GCC 13.3.0]
NumPy: 2.1.3
FAISS: 1.9.0
PyTorch: 2.5.1+cu124
CUDA available: True
GPU available: 0

FAISS test index is_trained: True
FAISS test index ntotal: 100
FAISS test search results shape: (1, 4)

FAISS basic functionality test passed!


In [4]:
from EnhancedRetrival import EnhancedClinicalRetrieval

In [5]:
# create icd9 mapping csv
icd9_df = pd.read_csv('../data/MIMIC_III_D_ICD_DIAGNOSES.csv')
icd9_mapping = icd9_df[['ICD9_CODE', 'SHORT_TITLE', 'LONG_TITLE']]
icd9_mapping.to_csv('../data/icd9_map.csv', index=False)

In [5]:
# Initialize enhanced retriever
enhanced_retriever = EnhancedClinicalRetrieval(
    base_retriever=retriever,
    icd9_map_path='../data/icd9_map.csv'  
)

# Correct test code
try:
    # Use test query
    test_query = retriever.embeddings[0]
    
    # Find similar cases
    similar_cases = enhanced_retriever.find_similar_cases(
        test_query,
        k=5,
        remove_duplicates=True,
        min_similarity=0.5
    )
    
    # Generate summary (corrected call)
    summary = enhanced_retriever.generate_summary(similar_cases)
    
    # Print results
    print("\nEnhanced Search Results:")
    for i, case in enumerate(similar_cases, 1):
        print(f"\nCase {i}:")
        print(f"Similarity: {case['similarity']:.3f}")
        print(f"Demographics: {case['demographics']}")
        print(f"Diagnoses:")
        for diag in case['diagnoses'][:3]:
            print(f"  - {diag}")
        print(f"Medications: {case['medications'][:200]}...")
    
    print("\nAnalysis Summary:")
    print(summary)

except Exception as e:
    print(f"Error: {str(e)}")
    import traceback
    traceback.print_exc()


Enhanced Search Results:

Case 1:
Similarity: 96.415
Demographics: 51-70 F
Diagnoses:
  - 20500: Ac myl leuk wo achv rmsn
  - 51882: Other pulmonary insuff
  - 2841
Medications: Ipratropium Bromide Neb 1 NEB via IH...

Case 2:
Similarity: 96.369
Demographics: 31-50 F
Diagnoses:
  - 51884: Acute & chronc resp fail
  - 0389: Septicemia NOS
  - 99591: Sepsis
Medications: Potassium Chloride 40 mEq via IV...

Case 3:
Similarity: 96.343
Demographics: 71-89 M
Diagnoses:
  - 5849: Acute kidney failure NOS
  - 42823: Ac on chr syst hrt fail
  - 20300: Mult mye w/o achv rmson
Medications: Furosemide 40 mg via IV...

Case 4:
Similarity: 96.223
Demographics: 71-89 M
Diagnoses:
  - 5789: Gastrointest hemorr NOS
  - 51881: Acute respiratry failure
  - 42823: Ac on chr syst hrt fail
Medications: Hydrocortisone Na Succ 25 mg via IV...

Case 5:
Similarity: 96.191
Demographics: 71-89 M
Diagnoses:
  - 5770: Acute pancreatitis
  - 41071: Subendo infarct, initial
  - 5761: Cholangitis
Medications: Phenyle

In [6]:
try:
    test_query = retriever.embeddings[0]
    
    similar_cases = enhanced_retriever.find_similar_cases(
        test_query,
        k=5,
        remove_duplicates=True
    )
    
    print("\nSimilar Cases Found:")
    for i, case in enumerate(similar_cases, 1):
        print(f"\nCase {i}:")
        print(f"Similarity: {case['similarity']:.3f}")
        print(f"Demographics: {case['demographics']}")
        print("Primary Diagnoses:")
        for diag in case['diagnoses'][:3]:
            print(f"  - {diag}")
        print(f"Key Medications: {case['medications'][:200]}...")
    
    print("\nClinical Analysis:")
    print(enhanced_retriever.generate_summary(similar_cases))

except Exception as e:
    print(f"Error: {str(e)}")


Similar Cases Found:

Case 1:
Similarity: 96.415
Demographics: 51-70 F
Primary Diagnoses:
  - 20500: Ac myl leuk wo achv rmsn
  - 51882: Other pulmonary insuff
  - 2841
Key Medications: Ipratropium Bromide Neb 1 NEB via IH...

Case 2:
Similarity: 96.369
Demographics: 31-50 F
Primary Diagnoses:
  - 51884: Acute & chronc resp fail
  - 0389: Septicemia NOS
  - 99591: Sepsis
Key Medications: Potassium Chloride 40 mEq via IV...

Case 3:
Similarity: 96.343
Demographics: 71-89 M
Primary Diagnoses:
  - 5849: Acute kidney failure NOS
  - 42823: Ac on chr syst hrt fail
  - 20300: Mult mye w/o achv rmson
Key Medications: Furosemide 40 mg via IV...

Case 4:
Similarity: 96.223
Demographics: 71-89 M
Primary Diagnoses:
  - 5789: Gastrointest hemorr NOS
  - 51881: Acute respiratry failure
  - 42823: Ac on chr syst hrt fail
Key Medications: Hydrocortisone Na Succ 25 mg via IV...

Case 5:
Similarity: 96.191
Demographics: 71-89 M
Primary Diagnoses:
  - 5770: Acute pancreatitis
  - 41071: Subendo infarct