In [4]:
import torch

# Load saved embeddings
lab_test_embeddings = torch.load("/kaggle/input/labreportandconversationembeddings/pytorch/default/1/lab_icd_embeddings.pt")  # Lab report embeddings
conversation_data = torch.load("/kaggle/input/labreportandconversationembeddings/pytorch/default/1/conversation_embeddings_updated.pt")  # Conversation embeddings

# Verify shapes before proceeding
for test, info in lab_test_embeddings.items():
    print(f"Lab Test: {test} | Embedding Shape: {info['lab_test_embedding'].shape}")

for category, embedding in conversation_data["embeddings"].items():
    print(f"{category} Embedding Shape: {embedding.shape}")


Lab Test: The lab tests mentioned in the report are:

1. Lipid Profile | Embedding Shape: torch.Size([1, 768])
Lab Test: Basic | Embedding Shape: torch.Size([1, 768])
Lab Test: Serum
2. HbA1c (Glycosylated Hemoglobin) | Embedding Shape: torch.Size([1, 768])
Lab Test: Blood | Embedding Shape: torch.Size([1, 768])
symptoms Embedding Shape: torch.Size([1, 768])
exposure Embedding Shape: torch.Size([1, 768])
medical_history Embedding Shape: torch.Size([1, 768])
medications Embedding Shape: torch.Size([1, 768])


  lab_test_embeddings = torch.load("/kaggle/input/labreportandconversationembeddings/pytorch/default/1/lab_icd_embeddings.pt")  # Lab report embeddings
  conversation_data = torch.load("/kaggle/input/labreportandconversationembeddings/pytorch/default/1/conversation_embeddings_updated.pt")  # Conversation embeddings


In [5]:
import torch.nn as nn

# Define new embedding size for richer representation
d_f = 1024  

# Define projection layers
proj_lab = nn.Linear(768, d_f)  # Project lab embeddings to 1024
proj_conv = nn.Linear(768, d_f)  # Project conversation embeddings to 1024

# Apply projections to lab report embeddings
for test, info in lab_test_embeddings.items():
    info["lab_test_embedding"] = proj_lab(info["lab_test_embedding"])  # Shape: (1, d_f)

# Apply projections to conversation embeddings
for category, embedding in conversation_data["embeddings"].items():
    conversation_data["embeddings"][category] = proj_conv(embedding)  # Shape: (1, d_f)

# Verify new shapes
for test, info in lab_test_embeddings.items():
    print(f"Lab Test: {test} | Projected Embedding Shape: {info['lab_test_embedding'].shape}")

for category, embedding in conversation_data["embeddings"].items():
    print(f"{category} Projected Embedding Shape: {embedding.shape}")

Lab Test: The lab tests mentioned in the report are:

1. Lipid Profile | Projected Embedding Shape: torch.Size([1, 1024])
Lab Test: Basic | Projected Embedding Shape: torch.Size([1, 1024])
Lab Test: Serum
2. HbA1c (Glycosylated Hemoglobin) | Projected Embedding Shape: torch.Size([1, 1024])
Lab Test: Blood | Projected Embedding Shape: torch.Size([1, 1024])
symptoms Projected Embedding Shape: torch.Size([1, 1024])
exposure Projected Embedding Shape: torch.Size([1, 1024])
medical_history Projected Embedding Shape: torch.Size([1, 1024])
medications Projected Embedding Shape: torch.Size([1, 1024])


In [6]:
import torch.nn as nn

'''
Multi-head Attention is a module for attention mechanisms which runs through an attention mechanism 
several times in parallel. The independent attention outputs are then concatenated and linearly transformed
into the expected dimension. Intuitively, multiple attention heads allows for attending to parts of the sequence differently 
(e.g. longer-term dependencies versus shorter-term dependencies).

Link jaha se padha : https://paperswithcode.com/method/multi-head-attention
'''
# Define Multihead Attention for Fusion
mha = nn.MultiheadAttention(embed_dim=1024, num_heads=8)  # 8 heads for better representation

# Apply fusion: Lab Test Embeddings + Symptoms Embeddings
for test, info in lab_test_embeddings.items():
    if "symptoms" in conversation_data["embeddings"]:  # Ensure symptoms embedding exists
        lab_seq = info["lab_test_embedding"].unsqueeze(0)  # Shape: (1, batch, 1024)
        conv_seq = conversation_data["embeddings"]["symptoms"].unsqueeze(0)  # Shape: (1, batch, 1024)

        # Apply multihead attention: using lab test as query, conversation as key/value
        attn_output, attn_weights = mha(query=lab_seq, key=conv_seq, value=conv_seq)

        # Store fused embedding
        info["fused_embedding"] = attn_output.squeeze(0)  # Shape: (1, 1024)

# Print final fused embeddings
for test, info in lab_test_embeddings.items():
    if "fused_embedding" in info:
        print(f"Lab Test: {test} | Fused Embedding Shape: {info['fused_embedding'].shape}")

Lab Test: The lab tests mentioned in the report are:

1. Lipid Profile | Fused Embedding Shape: torch.Size([1, 1024])
Lab Test: Basic | Fused Embedding Shape: torch.Size([1, 1024])
Lab Test: Serum
2. HbA1c (Glycosylated Hemoglobin) | Fused Embedding Shape: torch.Size([1, 1024])
Lab Test: Blood | Fused Embedding Shape: torch.Size([1, 1024])


In [9]:
import torch.nn as nn

# Creatin a Fully Connected Layer for passing fused embeddings created from Multihaed
class EnhancedFC(nn.Module):
    def __init__(self, input_dim=1024, hidden_dim1=512, hidden_dim2=256, output_dim=256, dropout_prob=0.3):
        super(EnhancedFC, self).__init__()

        self.fc = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.LayerNorm(hidden_dim1),  # ✅ Use LayerNorm instead of BatchNorm
            nn.ReLU(),
            nn.Dropout(dropout_prob),

            nn.Linear(hidden_dim1, hidden_dim2),
            nn.LayerNorm(hidden_dim2),  # ✅ Use LayerNorm instead of BatchNorm
            nn.ReLU(),
            nn.Dropout(dropout_prob),

            nn.Linear(hidden_dim2, output_dim)  # Final output (256-d embedding)
        )

    def forward(self, x):
        return self.fc(x)

# Initialize improved FC layer
fc_model = EnhancedFC(input_dim=1024, output_dim=256)

# Apply FC to all fused embeddings
for test, info in lab_test_embeddings.items():
    if "fused_embedding" in info:
        info["final_embedding"] = fc_model(info["fused_embedding"])  # Shape: (1, 256)

# Print final transformed embeddings
for test, info in lab_test_embeddings.items():
    if "final_embedding" in info:
        print(f"Lab Test: {test} | Final Embedding Shape: {info['final_embedding'].shape}")

Lab Test: The lab tests mentioned in the report are:

1. Lipid Profile | Final Embedding Shape: torch.Size([1, 256])
Lab Test: Basic | Final Embedding Shape: torch.Size([1, 256])
Lab Test: Serum
2. HbA1c (Glycosylated Hemoglobin) | Final Embedding Shape: torch.Size([1, 256])
Lab Test: Blood | Final Embedding Shape: torch.Size([1, 256])
