In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class DiffRule(nn.Module):
    def __init__(self, input_size,output_size,num_layers,default_class,lr=1e-3):
        super(DiffRule, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.logsoftmax = nn.LogSoftmax()
        self.y_default = F.one_hot(torch.tensor(default_class),num_classes=output_size)
        self.beta=nn.ParameterList([nn.Parameter(torch.randn(input_size,1,dtype=torch.float)) for i in range(num_layers)])
        self.phi=nn.ParameterList([nn.Parameter(torch.randn(1,1,dtype=torch.float)) for i in range(num_layers)])
        self.final_layer = nn.Embedding(1, output_size)
        nn.init.uniform_(self.final_layer.weight, -1, 1)
        # self.final_layer=nn.Parameter(torch.tensor([[.1,.2,.3,1.0]]))
        self.optimizer = optim.Adam(self.parameters(),lr=lr)
    def forward(self, x):
        y_default=self.y_default.repeat(x.size(0),1)
        out = self.final_layer.weight.repeat(x.size(0), 1)  # Start from the final layer

        for i in range(self.num_layers - 1, -1, -1):
            phi=self.phi[i].repeat(x.size(0),1)
            beta=self.beta[i]
            out = F.sigmoid(1 / (1 + torch.exp(x@beta - phi))) * out
            if i == self.num_layers - 1:
                out += (1 - F.sigmoid(1 / (1 + torch.exp(x@beta - phi)))) * y_default  # Add default output for final layer
        return self.logsoftmax(out)


In [None]:
def cust_loss(logprobs,labels,default_class=3):
    loss=0
    for lp,l in zip(logprobs,labels):
        # print(torch.argmax(lp))
        p=torch.argmax(lp)
        if p!=default_class: loss+=lp[l]
        else: loss+=lp[l]*0.0
    return -loss