In [100]:
import torch.nn as nn
import torch
import torch.nn.functional as F

class BinaryFocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, reduction='mean', eps=1e-8):
        """
        alpha: 类别权重（平衡正负样本，建议 0.25 用于正样本少的场景）
        gamma: 难易样本调节因子（越大，对难样本的关注越高）
        reduction: 'mean'/'sum'/'none'
        eps: 数值稳定性
        """
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.eps = eps

    def forward(self, inputs, targets):
        # 计算概率
        probs = torch.sigmoid(inputs)
        bce_loss = F.binary_cross_entropy_with_logits(
            inputs, targets, reduction='none'
        )
        
        # Focal Weight: (1 - p_t)^gamma
        p_t = probs * targets + (1 - probs) * (1 - targets)  # p if t=1 else 1-p
        focal_weight = (1 - p_t).pow(self.gamma)
        
        # Alpha 权重
        alpha_weight = self.alpha * targets + (1 - self.alpha) * (1 - targets)
        
        # 组合损失
        loss = focal_weight * alpha_weight * bce_loss
        
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:
            return loss




In [229]:

class AsymmetricLossOptimized(nn.Module):
    ''' Notice - optimized version, minimizes memory allocation and gpu uploading,
    favors inplace operations'''

    def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False, ft_cls=None, num_classes=9):
        super(AsymmetricLossOptimized, self).__init__()

        self.gamma_neg = gamma_neg
        self.gamma_pos = gamma_pos
        self.clip = clip
        self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss
        self.eps = eps

        self.flag = True

        self.ft_cls = ft_cls
        self.num_classes = num_classes
        # prevent memory allocation and gpu uploading every iteration, and encourages inplace operations
        self.targets = self.anti_targets = self.xs_pos = self.xs_neg = self.asymmetric_w = self.loss = None

    def forward(self, x, y):
        """"
        Parameters
        ----------
        x: input logits
        y: targets (multi-label binarized vector)
        """
        
        self.targets = y
        self.anti_targets = 1 - y

        # Calculating Probabilities
        self.xs_pos = torch.sigmoid(x)
        self.xs_neg = 1.0 - self.xs_pos

        # Asymmetric Clipping
        if self.clip is not None and self.clip > 0:
            self.xs_neg.add_(self.clip).clamp_(max=1)

        self.loss = self.targets * torch.log(self.xs_pos.clamp(min=self.eps))
        self.loss.add_(self.anti_targets * torch.log(self.xs_neg.clamp(min=self.eps)))
        
        # Asymmetric Focusing
        if self.gamma_neg > 0 or self.gamma_pos > 0:
            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(False)
            self.xs_pos = self.xs_pos * self.targets
            self.xs_neg = self.xs_neg * self.anti_targets

            if self.ft_cls is not None:
                # 需要按照微调需求手动更改
                # 根据目前的测试结果看，漏的情况的原因：1）阳性类的得分不够；2）0类的得分高了
                
                # 由于1和0经常比较相近，因此我们还可以考虑不对1类动手的方案
                gamma_neg = [1.0] + [1.0] + [10.] + [10.] + [10.] + [10.] + [1.]*3
                gamma_pos = [self.gamma_pos] * 9
                #weights = [0.] + [1.]*5 + [0.]*4
                weights = [0.] + [1.] + [2.]*4 + [0.]*3
            else:
                gamma_neg = self.gamma_neg
                gamma_pos = self.gamma_pos
                weights = torch.tensor([1.]*9, device=x.device)

            self.asymmetric_w = torch.pow(1 - self.xs_pos - self.xs_neg,
                                          gamma_pos * self.targets + gamma_neg * self.anti_targets)

            if self.disable_torch_grad_focal_loss:
                torch.set_grad_enabled(True)
            self.loss = self.loss * self.asymmetric_w
        

        if self.ft_cls is not None and 1==1:
            assert self.loss.shape[-1] == 10
            if self.ft_cls == 1:
                print("移除阳性类的loss")
                self.loss *= torch.tensor([1.] + [0.]*5 + [0.]*4).to(x.device) # 移除阳性类的loss
            elif self.ft_cls == 2: # 移除阴性类的loss:
                print("移除阴性类的loss")
                self.loss = self.loss*weights

        return -self.loss.sum(dim=1).mean()


In [250]:
import torch.nn.functional as F

num_classes = 10
batch_size = 5
logits = torch.randn(batch_size, num_classes)
labels = torch.randint(0, num_classes, (batch_size,))
labels_onehot = F.one_hot(labels, num_classes).type(torch.float32)
print(logits.shape, labels_onehot.shape)
criterion_focal = BinaryFocalLoss(alpha=0.25, gamma=2, reduction='mean', eps=1e-8)
criterion_bce = nn.BCEWithLogitsLoss(reduction='mean')
criterion_asl = AsymmetricLossOptimized(gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8)
loss_focal = criterion_focal(logits, labels_onehot)
loss_bce = criterion_bce(logits, labels_onehot)
loss_asl = criterion_asl(logits, labels_onehot)
print(loss_focal, loss_bce, loss_asl)

torch.Size([5, 10]) torch.Size([5, 10])
tensor(0.2845) tensor(0.8440) tensor(1.6843)


In [12]:
import torch
from torch import nn
import math
import torch.nn.functional as F


class Multiheadattention(nn.Module):
    def __init__(self,input_dim ,heads,d_model,dropout=0.1):
        super(Multiheadattention, self).__init__()
        self.d_model = d_model
        self.d_k = self.d_model // heads
        self.heads = heads
        self.input_dim = input_dim

        self.q_linear = nn.Linear(self.input_dim,self.d_model)  # batch_size,seq_len,d_model
        self.k_linear = nn.Linear(self.input_dim,self.d_model)  # batch_size,seq_len,d_model
        self.v_linear = nn.Linear(self.input_dim,self.d_model)  # batch_size,seq_len,d_model
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(self.d_model,d_model)
        self.init_weights_to_one()

    def init_weights_to_one(self):
        for param in self.parameters():
            nn.init.constant_(param, 1.0)
    
    def attention(self,q,k,v,d_k,mask=None,dropout=None):
        scores = torch.matmul(q,k.transpose(-2,-1)) / math.sqrt(self.d_k)  # batch_size,heads,seq_len,seq_len

        if mask is not None:
            mask = mask.unsqueeze(1)
            scores = scores.masked_fill_(mask==0,-1e9)

        scores = F.softmax(scores,dim=-1)  # batch_size,heads,seq_len,seq_len

        if dropout is not None:
            scores = dropout(scores)

        output = torch.matmul(scores,v)  # batch_size,heads,seq_len,d_k
        return output

    def forward(self,x,mask=None):
        bs = x.size(0)

        q = self.q_linear(x).view(bs,-1,self.heads,self.d_k) # batch_size,seq_len,heads,d_k
        k = self.k_linear(x).view(bs,-1,self.heads,self.d_k) # batch_size,seq_len,heads,d_k
        v = self.v_linear(x).view(bs,-1,self.heads,self.d_k) # batch_size,seq_len,heads,d_k

        q = q.transpose(1,2)  # batch_sie,heads,seq_len,d_k
        k = k.transpose(1,2)  # batch_sie,heads,seq_len,d_k
        v = v.transpose(1,2)  # batch_sie,heads,seq_len,d_k

        # 计算attention
        scores = self.attention(q,k,v,self.d_k,mask,self.dropout)  # batch_size,heads,seq_len,d_k

        # 连接多个头并输入到最后的线性层
        concat = scores.transpose(1,2).contiguous().view(bs,-1,self.d_model) # batch_size,seq_len,d_model

        output = self.out(concat)  # batch_size,seq_len,d_model
        return output

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class MyMultiheadattention(nn.Module):
    def __init__(self, input_dim, heads, d_model, dropout=0):
        super(MyMultiheadattention, self).__init__()
        self.input_dim = input_dim
        self.heads = heads
        self.d_model = d_model
        self.d_k = d_model // heads
        
        self.q_linear = nn.Linear(input_dim, d_model)
        self.k_linear = nn.Linear(input_dim, d_model)
        self.v_linear = nn.Linear(input_dim, d_model)
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(d_model, d_model)
        self.init_weights_to_one()

    def init_weights_to_one(self):
        for param in self.parameters():
            nn.init.constant_(param, 1.0)

    
    def attention(self, q, k, v, d_k, mask=None):
        # q [batch_size, heads, seq_len, d_model]
        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
        scores = F.softmax(scores, dim=-1)
        if self.dropout is not None:
            scores = self.dropout(scores)
        output = torch.matmul(scores, v)
        return output
    
    def forward(self, xs, mask=None):
        # xs:[batch_size, seq_len, input_dim]
        bs = xs.size(0)
        # q, k, v [batch_size, seq_len, d_model]
        q = self.q_linear(xs).view(bs, -1, self.heads, self.d_k).transpose(1, 2)
        k = self.k_linear(xs).view(bs, -1, self.heads, self.d_k).transpose(1, 2)
        v = self.v_linear(xs).view(bs, -1, self.heads, self.d_k).transpose(1, 2)
        
        # q, k, v [batch_size, heads, seq_len, d_k]
        attn_output = self.attention(q, k, v, self.d_k, mask)
        concat = attn_output.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
        output = self.out(concat)
        return output
        

In [98]:

class MyMultiheadattention2(nn.Module):
	def __init__(self, input_dim, heads, d_model, dropout=0.1):
		super(MyMultiheadattention2, self).__init__()
		self.input_dim = input_dim
		self.heads = heads
		self.d_model = d_model
		self.d_h =  d_model // heads
		
		self.q_linear = nn.Linear(input_dim, d_model)
		self.k_linear = nn.Linear(input_dim, d_model)
		self.v_linear = nn.Linear(input_dim, d_model)
		self.dropout = nn.Dropout(dropout)
		self.o_linear = nn.Linear(d_model, d_model)
		self._init_weights()
        
	def _init_weights(self):
		for param in self.parameters():
			nn.init.constant_(param, 1.0)
			
	def attention(self, q, k, v, mask):
		scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_h)
		scores = F.softmax(scores, dim=-1)
		scores = self.dropout(scores)
		output = torch.matmul(scores, v)
		return output

	def forward(self, xs, mask=None):
		bs = xs.shape[0]
		q = self.q_linear(xs).view(bs, -1, self.heads, self.d_h).transpose(1,2)
		k = self.k_linear(xs).view(bs, -1, self.heads, self.d_h).transpose(1,2)
		v = self.v_linear(xs).view(bs, -1, self.heads, self.d_h).transpose(1,2)
		attn_output = self.attention(q,k,v,mask)
		output = attn_output.transpose(1,2).contiguous().view(bs, -1, self.d_model)
		output = self.o_linear(output)
		return output 

In [99]:
input_dim = 8
seq_len = 10
heads = 2
d_model = 16
attention1 = Multiheadattention(input_dim, heads, d_model, dropout=0.1)
# attention2 = MyMultiheadattention(input_dim, heads, d_model, dropout=0.1)
attention3 = MyMultiheadattention2(input_dim, heads, d_model, dropout=0.1)
xs = torch.randn(2, seq_len, 8)
attention1.eval()
# attention2.eval()
attention3.eval()
y1 = attention1(xs)
# y2 = attention2(xs)
y3 = attention3(xs)
# print(y1.shape, y2.shape, y3.shape)
print(y1[0][1])
# print(y2[0][1])
print(y3[0][1])

tensor([146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904,
        146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904,
        146.3904, 146.3904], grad_fn=<SelectBackward0>)
tensor([146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904,
        146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904, 146.3904,
        146.3904, 146.3904], grad_fn=<SelectBackward0>)


In [20]:
import torch
from torch.nn.functional import one_hot
import numpy as np
id2label = {
    0: 'nilm',
    1: 'ascus',
    2: 'asch',
    3: 'lsil',
    4: 'hsil',
    5: 'agc',
    6: 't',
    7: 'm',
    8: 'bv',}
id2labelcode = {
    0: [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    1: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    2: [0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    3: [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    4: [0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0],
    5: [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
    6: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
    7: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
    8: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
}
# Generate random labels (0-8) for 10 samples
label = np.random.randint(0, 9, size=(10,))
# Or use sequential labels: label = np.arange(9)

batch_size = label.shape[0]

# Convert labels to one-hot encoding using numpy
label_onehot = np.array([id2labelcode[l] for l in label])

# Print results
for i in range(len(label_onehot)):
    print(f'{i}: {label_onehot[i].tolist()}')

0: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
1: [0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0]
2: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
3: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
4: [0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
5: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
6: [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
7: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
8: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]
9: [0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
