In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

embeddings2_raw = np.loadtxt("second_last_layer_embeddings.txt", delimiter=",")  # Shape: [6241, 200]

embedding2 = torch.tensor(embeddings2_raw, dtype=torch.float32)  # Shape: [6241, 200]

# Select the top 1500 rows (nodes)
embedding2 = embedding2[:1500]  # Shape: [1500, 200]

In [2]:
embedding2

tensor([[0.0588, 0.0000, 0.0902,  ..., 0.0000, 0.0941, 0.0000],
        [0.0764, 0.0000, 0.1202,  ..., 0.0000, 0.1293, 0.0000],
        [0.0875, 0.0000, 0.1268,  ..., 0.0000, 0.0642, 0.0000],
        ...,
        [0.0938, 0.0000, 0.0631,  ..., 0.0000, 0.0294, 0.0000],
        [0.0709, 0.0000, 0.0825,  ..., 0.0000, 0.0970, 0.0000],
        [0.0701, 0.0000, 0.0851,  ..., 0.0000, 0.0862, 0.0000]])

In [3]:
embedding2.shape

torch.Size([1500, 200])

In [4]:
import pandas as pd

# Load the data
file_path = "1500_labels.txt"  # Replace with your file path
data = pd.read_csv(file_path, sep="\t", header=None, names=["ID", "Category", "Value"])

# Display the data
print(data)


          ID Category  Value
0         37      val     24
1         53    train      4
2        171    train     28
3        335    train     23
4        349    train      8
...      ...      ...    ...
1495  168886     test     37
1496  168979    train     37
1497  169087    train     16
1498  169154    train     30
1499  169158    train     27

[1500 rows x 3 columns]


In [5]:
indices = torch.tensor(data["ID"].values, dtype=torch.long)
splits = data["Category"].values.tolist()  # Ensure it's a list of strings
labels = torch.tensor(data["Value"].values, dtype=torch.long)

In [6]:
gnn_embeddings = torch.load("gnn_embeddings.pt")
# Extract embeddings for the given indices
embedding1 = gnn_embeddings[indices]
embedding1.shape

torch.Size([1500, 256])

In [7]:
embedding1

tensor([[2.4802, 0.0000, 1.8834,  ..., 0.0000, 1.9623, 0.0000],
        [3.0734, 0.0000, 0.3168,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 2.5264,  ..., 0.5769, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 3.2684,  ..., 0.0000, 0.0000, 0.0000],
        [2.2183, 2.8732, 0.5575,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 2.3806, 0.0000, 0.0000]],
       grad_fn=<IndexBackward0>)

In [8]:


# Step 4: Concatenate the Embeddings
concatenated_embeddings = torch.cat((embedding1[:1500], embedding2), dim=1)  # Shape: [1500, 456]
concatenated_embeddings.shape

torch.Size([1500, 456])

In [9]:
concatenated_embeddings

tensor([[2.4802, 0.0000, 1.8834,  ..., 0.0000, 0.0941, 0.0000],
        [3.0734, 0.0000, 0.3168,  ..., 0.0000, 0.1293, 0.0000],
        [0.0000, 0.0000, 2.5264,  ..., 0.0000, 0.0642, 0.0000],
        ...,
        [0.0000, 0.0000, 3.2684,  ..., 0.0000, 0.0294, 0.0000],
        [2.2183, 2.8732, 0.5575,  ..., 0.0000, 0.0970, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0862, 0.0000]],
       grad_fn=<CatBackward0>)

In [10]:
train_indices = [i for i, split in enumerate(splits) if split == "train"]
val_indices = [i for i, split in enumerate(splits) if split == "val"]
test_indices = [i for i, split in enumerate(splits) if split == "test"]


In [11]:
train_data = TensorDataset(concatenated_embeddings[train_indices], labels[train_indices])
val_data = TensorDataset(concatenated_embeddings[val_indices], labels[val_indices])
test_data = TensorDataset(concatenated_embeddings[test_indices], labels[test_indices])

# Step 4: Define DataLoader
batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)


In [12]:


# Step 5: Define the model
class Classifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Classifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    
    def forward(self, x):
        return self.fc(x)

input_size = concatenated_embeddings.shape[1]#456
num_classes = labels.max().item() + 1
model = Classifier(input_size, num_classes)

# Step 6: Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [13]:
# Early stopping parameters
patience = 3  # Number of epochs to wait for improvement
best_accuracy = 0  # Keep track of the best validation accuracy
patience_counter = 0  # Counter to track how many epochs since the last improvement

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for embeddings, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, targets)
        loss.backward(retain_graph=True)
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")

    # Validation loop
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for embeddings, targets in val_loader:
            outputs = model(embeddings)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    val_accuracy = 100 * correct / total
    print(f"Validation Accuracy: {val_accuracy:.2f}%")

    # Early stopping check
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        patience_counter = 0  # Reset patience counter since we have improved
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs. Best Accuracy: {best_accuracy:.2f}%")
        break


Epoch 1/10, Loss: 185.5250
Validation Accuracy: 35.32%
Epoch 2/10, Loss: 127.2754
Validation Accuracy: 52.75%
Epoch 3/10, Loss: 98.9980
Validation Accuracy: 56.42%
Epoch 4/10, Loss: 81.9642
Validation Accuracy: 54.59%
Epoch 5/10, Loss: 68.3719
Validation Accuracy: 55.50%
Epoch 6/10, Loss: 57.5482
Validation Accuracy: 58.26%
Epoch 7/10, Loss: 49.4468
Validation Accuracy: 58.26%
Epoch 8/10, Loss: 40.6731
Validation Accuracy: 55.96%
Epoch 9/10, Loss: 32.2804
Validation Accuracy: 52.29%
Early stopping triggered after 9 epochs. Best Accuracy: 58.26%


In [14]:

# Step 8: Test the model
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for embeddings, targets in test_loader:
        outputs = model(embeddings)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 52.17%


# resize and add 

In [15]:
# Define a linear transformation layer
linear_layer = nn.Linear(256, 200)

# Apply this transformation to the embedding1 tensor
embedding1_transformed = linear_layer(embedding1)

# Check the shape of transformed embedding1
print(embedding1_transformed.shape)  # Should be [1500, 200]


torch.Size([1500, 200])


In [16]:
print(embedding2.shape)

torch.Size([1500, 200])


In [18]:
added_embeddings = embedding1_transformed + embedding2


In [19]:
train_data = TensorDataset(added_embeddings[train_indices], labels[train_indices])
val_data = TensorDataset(added_embeddings[val_indices], labels[val_indices])
test_data = TensorDataset(added_embeddings[test_indices], labels[test_indices])

# Step 4: Define DataLoader
batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)


In [22]:

input_size = added_embeddings.shape[1]#200
input_size

200

In [23]:


# Step 5: Define the model
class Classifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Classifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    
    def forward(self, x):
        return self.fc(x)

num_classes = labels.max().item() + 1
model = Classifier(input_size, num_classes)

# Step 6: Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [24]:
# Early stopping parameters
patience = 3  # Number of epochs to wait for improvement
best_accuracy = 0  # Keep track of the best validation accuracy
patience_counter = 0  # Counter to track how many epochs since the last improvement

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for embeddings, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, targets)
        loss.backward(retain_graph=True)
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")

    # Validation loop
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for embeddings, targets in val_loader:
            outputs = model(embeddings)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    val_accuracy = 100 * correct / total
    print(f"Validation Accuracy: {val_accuracy:.2f}%")

    # Early stopping check
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        patience_counter = 0  # Reset patience counter since we have improved
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs. Best Accuracy: {best_accuracy:.2f}%")
        break


Epoch 1/10, Loss: 197.2795
Validation Accuracy: 34.40%
Epoch 2/10, Loss: 143.3461
Validation Accuracy: 47.25%
Epoch 3/10, Loss: 115.9702
Validation Accuracy: 51.83%
Epoch 4/10, Loss: 100.5291
Validation Accuracy: 55.50%
Epoch 5/10, Loss: 88.5624
Validation Accuracy: 53.67%
Epoch 6/10, Loss: 79.5167
Validation Accuracy: 55.05%
Epoch 7/10, Loss: 71.5351
Validation Accuracy: 52.75%
Early stopping triggered after 7 epochs. Best Accuracy: 55.50%


In [25]:

# Step 8: Test the model
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for embeddings, targets in test_loader:
        outputs = model(embeddings)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 48.22%


# multiply

In [27]:
multiplied_embeddings = embedding1_transformed * embedding2
train_data = TensorDataset(multiplied_embeddings[train_indices], labels[train_indices])
val_data = TensorDataset(multiplied_embeddings[val_indices], labels[val_indices])
test_data = TensorDataset(multiplied_embeddings[test_indices], labels[test_indices])

# Step 4: Define DataLoader
batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)



input_size = multiplied_embeddings.shape[1]#200




# Step 5: Define the model
class Classifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(Classifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )
    
    def forward(self, x):
        return self.fc(x)

num_classes = labels.max().item() + 1
model = Classifier(input_size, num_classes)

# Step 6: Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [28]:
# Early stopping parameters
patience = 3  # Number of epochs to wait for improvement
best_accuracy = 0  # Keep track of the best validation accuracy
patience_counter = 0  # Counter to track how many epochs since the last improvement

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for embeddings, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, targets)
        loss.backward(retain_graph=True)
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")

    # Validation loop
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for embeddings, targets in val_loader:
            outputs = model(embeddings)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    val_accuracy = 100 * correct / total
    print(f"Validation Accuracy: {val_accuracy:.2f}%")

    # Early stopping check
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        patience_counter = 0  # Reset patience counter since we have improved
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs. Best Accuracy: {best_accuracy:.2f}%")
        break



# Step 8: Test the model
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for embeddings, targets in test_loader:
        outputs = model(embeddings)
        _, predicted = torch.max(outputs, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")

Epoch 1/10, Loss: 226.6037
Validation Accuracy: 11.47%
Epoch 2/10, Loss: 186.1115
Validation Accuracy: 22.94%
Epoch 3/10, Loss: 166.7476
Validation Accuracy: 26.61%
Epoch 4/10, Loss: 154.0381
Validation Accuracy: 32.57%
Epoch 5/10, Loss: 144.6842
Validation Accuracy: 32.57%
Epoch 6/10, Loss: 135.9187
Validation Accuracy: 39.45%
Epoch 7/10, Loss: 130.8627
Validation Accuracy: 43.58%
Epoch 8/10, Loss: 124.1877
Validation Accuracy: 40.83%
Epoch 9/10, Loss: 119.7451
Validation Accuracy: 45.87%
Epoch 10/10, Loss: 112.9565
Validation Accuracy: 46.33%
Test Accuracy: 42.69%
