In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer, TransformerDecoder, TransformerDecoderLayer
from intelligraphs.data_loaders.loaders import IntelliGraphsDataLoader
import torch

data_load = IntelliGraphsDataLoader('syn-paths')
train_data, val_data, test_data = data_load.load_torch()
print("Data loaded successfully.")

# Define Transformer Block


Data loaded successfully.


In [None]:
# Checking the structure of train_data
for batch in train_data:
    print("Input Shape:", batch[)  # Assuming key is 'input'
    break


Input Shape: tensor([16,  1, 29])


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from intelligraphs.data_loaders.loaders import IntelliGraphsDataLoader

# Transformer Block
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.layer = TransformerEncoderLayer(embed_dim, num_heads, ff_dim, dropout)
        self.transformer = TransformerEncoder(self.layer, num_layers=3)  # 3 Transformer Blocks

    def forward(self, x):
        return self.transformer(x)

# Variational Encoder
class VariationalEncoder(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, latent_dim, num_entities, num_relations):
        super(VariationalEncoder, self).__init__()
        self.entity_embedding = nn.Embedding(num_entities, embed_dim)
        self.relation_embedding = nn.Embedding(num_relations, embed_dim)

        self.transformer = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.mean_proj = nn.Linear(embed_dim, latent_dim)  # Projection for μ
        self.logvar_proj = nn.Linear(embed_dim, latent_dim)  # Projection for log(σ)

    def forward(self, x):
        # x shape: (batch_size, 3, 3) -> entity, relation, entity
        head, rel, tail = x[:, 0], x[:, 1], x[:, 2]  # Extract triplets

        # Convert indices to embeddings
        head_emb = self.entity_embedding(head)  # (batch_size, embed_dim)
        rel_emb = self.relation_embedding(rel)  # (batch_size, embed_dim)
        tail_emb = self.entity_embedding(tail)  # (batch_size, embed_dim)

        # Stack instead of concatenate to match expected Transformer shape (batch_size, seq_len=3, embed_dim)
        x = torch.stack([head_emb, rel_emb, tail_emb], dim=1)  # Shape: (batch_size, 3, embed_dim)

        # Transformer Encoder
        x = self.transformer(x)  # Output shape: (batch_size, 3, embed_dim)
        x = torch.mean(x, dim=1)  # Mean pooling over sequence dimension -> (batch_size, embed_dim)

        # Compute Variational Parameters
        mu = self.mean_proj(x)  # (batch_size, latent_dim)
        logvar = self.logvar_proj(x)  # (batch_size, latent_dim)

        return mu, logvar


# Reparameterization Trick
def reparameterize(mu, logvar):
    std = torch.exp(0.5 * logvar)
    eps = torch.randn_like(std)
    return mu + eps * std

# Variational Decoder
class VariationalDecoder(nn.Module):
    def __init__(self, latent_dim, embed_dim, num_heads, ff_dim, output_dim):
        super(VariationalDecoder, self).__init__()
        self.linear_proj = nn.Linear(latent_dim, embed_dim)  # Linear projection to initialize input
        self.transformer = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.output_proj = nn.Linear(embed_dim, output_dim)  # Output projection

    def forward(self, z):
        z = self.linear_proj(z).unsqueeze(1)  # Expand to sequence dimension
        z = self.transformer(z)  # Transformer blocks
        z = self.output_proj(z)  # Final LP layer
        return z.squeeze(1)

# Structure Decoder with Tensor Factorization
class StructureDecoder(nn.Module):
    def __init__(self, embed_dim, relation_dim, num_entities):
        super(StructureDecoder, self).__init__()
        self.entity_embedding = nn.Linear(embed_dim, num_entities)  # Map to entity embeddings
        self.relation_embedding = nn.Linear(embed_dim, relation_dim)  # Map to relations

    def forward(self, decoded_output):
        entity_recon = self.entity_embedding(decoded_output)
        relation_recon = self.relation_embedding(decoded_output)
        adjacency_matrix = torch.matmul(entity_recon, entity_recon.transpose(0, 1))  # Factorized graph construction
        return adjacency_matrix, relation_recon

# Full FG-VAE Model
class FG_VAE(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, latent_dim, output_dim, relation_dim, num_entities, num_relations):
        super(FG_VAE, self).__init__()
        self.encoder = VariationalEncoder(embed_dim, num_heads, ff_dim, latent_dim, num_entities, num_relations)
        self.decoder = VariationalDecoder(latent_dim, embed_dim, num_heads, ff_dim, output_dim)
        self.structure_decoder = StructureDecoder(embed_dim, relation_dim, num_entities)

    def forward(self, x):
        mu, logvar = self.encoder(x)
        z = reparameterize(mu, logvar)
        decoded_output = self.decoder(z)
        adjacency_matrix, relations = self.structure_decoder(decoded_output)
        return adjacency_matrix, relations, mu, logvar

# Model Instantiation
embed_dim = 64
num_heads = 4
ff_dim = 128
latent_dim = 32
output_dim = 64  # Same as embed_dim
relation_dim = 16
num_entities = 100  # Example: Number of unique entities
num_relations = 20  # Example: Number of unique relations

model = FG_VAE(embed_dim, num_heads, ff_dim, latent_dim, output_dim, relation_dim, num_entities, num_relations)

# Load dataset
# Load dataset
data_load = IntelliGraphsDataLoader('syn-paths')
train_loader, val_loader, test_loader = data_load.load_torch()
print("Data loaded successfully.")

# Get one batch from train_loader
batch = next(iter(train_loader))  # Get the first batch

# Ensure input is a tensor
x = batch[0]  # Assuming the first element is the input data

# Convert input to integer tensor if it's not already
x = x.long()  # Ensure it's integer-based for embedding lookup

print("Input Shape:", x.shape)  # Expected: (batch_size, 3, 3)

# Forward Pass
adj_matrix, relations, mu, logvar = model(x)

# Print Output Shapes
print("Adjacency Matrix Shape:", adj_matrix.shape)
print("Relations Shape:", relations.shape)
print("Mean Shape:", mu.shape)
print("Log Variance Shape:", logvar.shape)



Data loaded successfully.
Input Shape: torch.Size([3, 3])
Adjacency Matrix Shape: torch.Size([3, 3])
Relations Shape: torch.Size([3, 16])
Mean Shape: torch.Size([3, 32])
Log Variance Shape: torch.Size([3, 32])


In [None]:
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader

# KL Divergence Loss
def kl_divergence(mu, logvar):
    return -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

# Training Function
def train(model, train_loader, optimizer, num_epochs=10):
    model.train()
    
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            
            x = batch[0].long()  # Ensure integer inputs for embedding
            adj_matrix, relations, mu, logvar = model(x)
            
            # Reconstruction Loss (Adjacency Matrix)
            recon_loss = F.mse_loss(adj_matrix, torch.eye(adj_matrix.shape[0]).to(adj_matrix.device))

            # KL Divergence
            kl_loss = kl_divergence(mu, logvar)

            # Total VAE Loss
            loss = recon_loss + 0.1 * kl_loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

# Evaluation Function: Mean Reciprocal Rank (MRR)
def evaluate_mrr(model, test_loader):
    model.eval()
    ranks = []
    
    with torch.no_grad():
        for batch in test_loader:
            x = batch[0].long()
            adj_matrix, relations, mu, logvar = model(x)

            print(f"adj_matrix shape: {adj_matrix.shape}")  # Debugging
            print(f"x shape: {x.shape}")  # Debugging

            for i in range(x.shape[0]):  # Iterate over batch
                head, rel, tail = x[i, 0].item(), x[i, 1].item(), x[i, 2].item()
                print(f"Head: {head}, Relation: {rel}, Tail: {tail}")  # Debugging

                # Check if head index is within bounds
                if head >= adj_matrix.shape[0]:
                    print(f"Error: head index {head} is out of bounds for adj_matrix of shape {adj_matrix.shape}")
                    continue  # Skip this sample to prevent crashing

                # Compute scores for all possible tails
                scores = adj_matrix[head]  # Get similarity scores
                sorted_scores = torch.argsort(scores, descending=True)

                # Find rank of the correct tail
                if tail >= sorted_scores.shape[0]:  # Ensure valid tail index
                    print(f"Error: tail index {tail} is out of bounds for sorted_scores of shape {sorted_scores.shape}")
                    continue

                rank = (sorted_scores == tail).nonzero(as_tuple=True)[0].item() + 1  # Convert to 1-based index
                ranks.append(1 / rank)

    if len(ranks) == 0:
        print("Warning: No valid ranks computed.")
        return 0.0

    mrr = torch.tensor(ranks).mean().item()
    print(f"MRR: {mrr:.4f}")
    return mrr

# Ensure `train_loader`, `val_loader`, and `test_loader` are passed from your existing dataset
# If they are already defined, you can directly use them below:

# Initialize Model
embed_dim = 64
num_heads = 4
ff_dim = 128
latent_dim = 32
output_dim = 64  # Same as embed_dim
relation_dim = 16
num_entities = 100  # Example: Number of unique entities
num_relations = 20  # Example: Number of unique relations

# Initialize FG_VAE model (make sure the class definition exists)
model = FG_VAE(embed_dim, num_heads, ff_dim, latent_dim, output_dim, relation_dim, num_entities, num_relations)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Ensure dataset entity range before training
for batch in train_loader:
    x = batch[0].long()
    max_entity_index = x.max().item()
    print(f"Max entity index in dataset: {max_entity_index}, Model num_entities: {num_entities}")
    if max_entity_index >= num_entities:
        print(f"Error: max entity index {max_entity_index} exceeds num_entities {num_entities}.")
    break  # Only check one batch

# Train Model with the existing dataset
train(model, train_loader, optimizer, num_epochs=10)

# Evaluate Model with the existing dataset
evaluate_mrr(model, test_loader)


Max entity index in dataset: 29, Model num_entities: 25
Error: max entity index 29 exceeds num_entities 25.


IndexError: index out of range in self

In [None]:
import torch
import torch.optim as optim
from torch_geometric.nn import TransE
from torch_geometric.loader import DataLoader
from intelligraphs.data_loaders.loaders import IntelliGraphsDataLoader  # Your dataset loader

# ========================
# Define Parameters
# ========================
embed_dim = 64
num_entities = 100  # Adjust based on your dataset
num_relations = 20  # Adjust based on your dataset

# ========================
# Initialize TransE Model
# ========================
model = TransE(num_nodes=num_entities, num_relations=num_relations, hidden_channels=embed_dim)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ========================
# Load IntelliGraphs Dataset
# ========================
data_load = IntelliGraphsDataLoader('syn-paths')
train_loader, val_loader, test_loader = data_load.load_torch()

# ========================
# Training Function
# ========================
def train(model, data_loader, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in data_loader:
            optimizer.zero_grad()
            x = batch[0].long()  # Ensure integer input for embeddings
            loss = model.loss(x[:, 0], x[:, 1], x[:, 2])  # (head, relation, tail)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

# ========================
# Evaluation Function (MRR & Hits@10)
# ========================
@torch.no_grad()
@torch.no_grad()
def evaluate(model, data_loader):
    model.eval()
    mean_ranks, hits_at_10s = [], []

    for batch in data_loader:
        x = batch[0].long()
        heads, relations, tails = x[:, 0], x[:, 1], x[:, 2]

        # Capture all returned values
        result = model.test(heads, relations, tails, batch_size=1000, k=10)

        # Unpack based on the number of values returned
        if isinstance(result, tuple) and len(result) >= 2:
            mean_rank, hits_at_10 = result[:2]  # Extract the first two values
            mean_ranks.append(mean_rank)
            hits_at_10s.append(hits_at_10)

    # Compute overall metrics
    avg_mean_rank = sum(mean_ranks) / len(mean_ranks)
    avg_hits_at_10 = sum(hits_at_10s) / len(hits_at_10s)

    print(f"Final Evaluation - Mean Rank (MRR): {avg_mean_rank:.2f}, Hits@10: {avg_hits_at_10:.4f}")

# Run Evaluation



# ========================
# Run Training & Evaluation
# ========================
train(model, train_loader, optimizer, num_epochs=10)
evaluate(model, test_loader)


Epoch 1/10, Loss: 1795.6851
Epoch 2/10, Loss: 1605.1674
Epoch 3/10, Loss: 1366.9462
Epoch 4/10, Loss: 1142.8964
Epoch 5/10, Loss: 1015.8445
Epoch 6/10, Loss: 969.8641
Epoch 7/10, Loss: 930.0904
Epoch 8/10, Loss: 912.6677
Epoch 9/10, Loss: 898.3297
Epoch 10/10, Loss: 912.6243


100%|██████████| 3/3 [00:00<00:00, 3300.00it/s]
100%|██████████| 3/3 [00:00<00:00, 2849.39it/s]
100%|██████████| 3/3 [00:00<00:00, 2777.68it/s]
100%|██████████| 3/3 [00:00<00:00, 2717.10it/s]
100%|██████████| 3/3 [00:00<00:00, 3177.50it/s]
100%|██████████| 3/3 [00:00<00:00, 2459.04it/s]
100%|██████████| 3/3 [00:00<00:00, 3021.11it/s]
100%|██████████| 3/3 [00:00<00:00, 3521.67it/s]
100%|██████████| 3/3 [00:00<00:00, 2676.65it/s]
100%|██████████| 3/3 [00:00<00:00, 2722.40it/s]
100%|██████████| 3/3 [00:00<00:00, 2892.62it/s]
100%|██████████| 3/3 [00:00<00:00, 2915.41it/s]
100%|██████████| 3/3 [00:00<00:00, 2910.02it/s]
100%|██████████| 3/3 [00:00<00:00, 2702.52it/s]
100%|██████████| 3/3 [00:00<00:00, 1910.84it/s]
100%|██████████| 3/3 [00:00<00:00, 1975.96it/s]
100%|██████████| 3/3 [00:00<00:00, 2523.14it/s]
100%|██████████| 3/3 [00:00<00:00, 2704.84it/s]
100%|██████████| 3/3 [00:00<00:00, 2857.16it/s]
100%|██████████| 3/3 [00:00<00:00, 2510.56it/s]
100%|██████████| 3/3 [00:00<00:00, 2692.

Final Evaluation - Mean Rank (MRR): 24.25, Hits@10: 0.0892





Epoch 0: Loss = 219.406860
Epoch 200: Loss = 68.305603
Epoch 400: Loss = 26.170866
Epoch 600: Loss = 13.507149
Epoch 800: Loss = 11.020437
Epoch 1000: Loss = 10.159431
Epoch 1200: Loss = 9.395856
Epoch 1400: Loss = 9.836242
Epoch 1600: Loss = 10.719400
Epoch 1800: Loss = 11.270818
Epoch 2000: Loss = 9.899794
Epoch 2200: Loss = 10.065038
Epoch 2400: Loss = 10.294238
Epoch 2600: Loss = 10.090910
Epoch 2800: Loss = 9.850412
Epoch 3000: Loss = 10.820418
Epoch 3200: Loss = 9.895985
Epoch 3400: Loss = 9.233746


KeyboardInterrupt: 