In [7]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn

torch.manual_seed(42)
np.random.seed(42)

In [8]:
N = 30
dataset = np.zeros((2*N, 2)) # 0, 1, 2, .......... 29

for i in range(N):
    dataset[i][0]=i
    dataset[i][1]=0 
        
for i in range(N, 2*N):
    dataset[i][0]=i
    dataset[i][1]=1
            
labels = np.zeros((2*N, N))
for i in range(N):
    one_idx = (i+10)%N
    labels[i][one_idx]=1

for i in range(N, 2*N):
    one_idx = (i-N+20)%N
    labels[i][one_idx]=1
            
# first half of the dataset is one relation-0 and the second half on relation-1
# last number in input represents the relation number which is then convert to a N-dim vector by the model, so that finally we have e_h and e_r of same dims

# shuffle the dataset
shuffle = np.random.permutation(2*N)
dataset = dataset[shuffle]
labels = labels[shuffle]

# divide in train and validation set
train_proportion = 0.8
train_data = dataset[:int(train_proportion*2*N)]
train_labels = labels[:int(train_proportion*2*N)]
val_data = dataset[int(train_proportion*2*N):]
val_labels = labels[int(train_proportion*2*N):]

# convert to tensors
train_data = torch.from_numpy(train_data).float()
train_labels = torch.from_numpy(train_labels).float()
val_data = torch.from_numpy(val_data).float()
val_labels = torch.from_numpy(val_labels).float()

In [9]:
class BilinearMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.num_embed = nn.Embedding(30, input_size)
        self.rel_embed = nn.Embedding(2, input_size)
        self.bl = nn.Bilinear(input_size, input_size, hidden_size, bias=False)
        self.lin = nn.Linear(hidden_size, output_size, bias=False)
        
    def forward(self, x):
        e_h = self.num_embed(x[:, 0].long())
        e_r = self.rel_embed(x[:, 1].long())
        h = self.bl(e_h, e_r)
        logits = self.lin(h)
        
        return logits

In [10]:
# def apply_scaled_default_init(model: nn.Module, scale: float = 5.0):
#     """
#     Apply PyTorch defaults (Kaiming/Xavier where appropriate) then multiply weights by `scale`.
#     This preserves relative structure but makes them larger.
#     """
#     def _init(m):
#         if isinstance(m, nn.Embedding):
#             nn.init.normal_(m.weight, mean=0.0, std=1.0)
#             m.weight.data.mul_(scale)
#         elif isinstance(m, nn.Bilinear):
#             # use kaiming for bilinear weights flattened -> still ok to use normal then scale
#             nn.init.kaiming_normal_(m.weight.view(m.weight.size(0), -1), a=0, mode='fan_in', nonlinearity='linear')
#             m.weight.data.mul_(scale)
#             if m.bias is not None:
#                 nn.init.zeros_(m.bias)
#         elif isinstance(m, nn.Linear):
#             nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in', nonlinearity='linear')
#             m.weight.data.mul_(scale)
#             if m.bias is not None:
#                 nn.init.zeros_(m.bias)
#     model.apply(_init)


In [11]:
def train(model, train_data, train_labels, val_data, val_labels, epochs=100, batch_size=16, lr=0.003):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=0.01)
    loss_fn = torch.nn.CrossEntropyLoss()
    
    train_loss_values = []
    val_loss_values = []
    train_acc_values = []
    val_acc_values = []
    
    running_train_loss = 0
    print(epochs)
    for epoch in range(epochs):
        model.train()
        correct_train_preds = 0
        total_train_preds = 0
        for batch in range(0, len(train_data), batch_size):
            optimizer.zero_grad()
            output = model(train_data[batch:batch+batch_size])
            loss = loss_fn(output, torch.argmax(train_labels[batch:batch+batch_size], dim=1)) 
            running_train_loss += loss.item()
            preds = torch.argmax(output, dim=1)
            correct_train_preds += (preds == torch.argmax(train_labels[batch:batch+batch_size], dim=1)).sum().item()
            total_train_preds += len(preds)
            loss.backward()
            optimizer.step()
        model.eval()


        output = model(val_data)
        val_loss = loss_fn(output, torch.argmax(val_labels, dim=1)).item()
        val_preds = torch.argmax(output, dim=1)
        correct_val_preds = (val_preds == torch.argmax(val_labels, dim=1)).sum().item()
        total_val_preds = len(val_preds)
        avg_train_loss = running_train_loss / (len(train_data) / batch_size)
        train_acc = correct_train_preds / total_train_preds
        val_acc = correct_val_preds / total_val_preds
        train_loss_values.append(avg_train_loss)
        val_loss_values.append(val_loss)
        train_acc_values.append(train_acc)
        val_acc_values.append(val_acc)

        print("Epoch: {} | Train loss: {:.2f} | Validation loss: {:.2f} | Train accuracy: {:.2f} | Validation accuracy: {:.2f}".format(epoch, avg_train_loss, val_loss, train_acc, val_acc))

        running_train_loss = 0
    return model, train_loss_values, val_loss_values, train_acc_values, val_acc_values

In [12]:
model = BilinearMLP(N, 100, N)
# apply_scaled_default_init(model, scale=20.0)
model, train_loss_values, val_loss_values, train_acc_values, val_acc_values = train(model, train_data, train_labels, val_data, val_labels, epochs=10000, batch_size=16, lr=0.003)

10000


IndexError: index out of range in self