# Library

In [1]:
#general
import argparse
import sys
import os
import random
from scipy import signal
import copy
from typing import Union
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from typing import Callable, Tuple

from typing import List, Dict
import torch.nn as nn
import itertools
import random
import numpy as np

import torch
from torch.utils.data import Sampler
from torch.utils.data import DataLoader, Dataset
from typing import TypeVar, Iterable, Dict, List

#sklearn
from sklearn.metrics import accuracy_score, f1_score
from sklearn.calibration import calibration_curve
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split,StratifiedKFold
from scipy.interpolate import interp1d
from scipy.optimize import brentq
from sklearn.metrics import roc_curve, precision_recall_fscore_support, roc_auc_score

#hf
from transformers import (AutoConfig,AutoModel)

#torch
import torch.nn.functional as F
from torch.utils.data import Dataset
import torch
from torch import nn
from torch import Tensor
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import LambdaLR


#librosa
import librosa

#wandb
import wandb


import warnings
warnings.filterwarnings("ignore")

In [2]:
class ForeverDataIterator:
    r"""A data iterator that will never stop producing data"""

    def __init__(self, data_loader: DataLoader, device=None):
        self.data_loader = data_loader
        self.iter = iter(self.data_loader)
        self.device = device

    def __next__(self):
        try:
            data = next(self.iter)
            if self.device is not None:
                data = send_to_device(data, self.device)
        except StopIteration:
            self.iter = iter(self.data_loader)
            data = next(self.iter)
            if self.device is not None:
                data = send_to_device(data, self.device)
        return data

    def __len__(self):
        return len(self.data_loader)


class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, adaptive=False, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, adaptive=adaptive, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None: continue
                e_w = (torch.pow(p, 2) if group["adaptive"] else 1.0) * p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][0].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
                    torch.stack([
                        ((torch.abs(p) if group["adaptive"] else 1.0) * p.grad).norm(p=2).to(shared_device)
                        for group in self.param_groups for p in group["params"]
                        if p.grad is not None
                    ]),
                    p=2
               )
        return norm


class ConditionalDomainAdversarialLoss(nn.Module):
    r"""The Conditional Domain Adversarial Loss used in `Conditional Adversarial Domain Adaptation (NIPS 2018) <https://arxiv.org/abs/1705.10667>`_

    Conditional Domain adversarial loss measures the domain discrepancy through training a domain discriminator in a
    conditional manner. Given domain discriminator :math:`D`, feature representation :math:`f` and
    classifier predictions :math:`g`, the definition of CDAN loss is

    .. math::
        loss(\mathcal{D}_s, \mathcal{D}_t) &= \mathbb{E}_{x_i^s \sim \mathcal{D}_s} \text{log}[D(T(f_i^s, g_i^s))] \\
        &+ \mathbb{E}_{x_j^t \sim \mathcal{D}_t} \text{log}[1-D(T(f_j^t, g_j^t))],\\

    where :math:`T` is a :class:`MultiLinearMap`  or :class:`RandomizedMultiLinearMap` which convert two tensors to a single tensor.

    Args:
        domain_discriminator (torch.nn.Module): A domain discriminator object, which predicts the domains of
          features. Its input shape is (N, F) and output shape is (N, 1)
        entropy_conditioning (bool, optional): If True, use entropy-aware weight to reweight each training example.
          Default: False
        randomized (bool, optional): If True, use `randomized multi linear map`. Else, use `multi linear map`.
          Default: False
        num_classes (int, optional): Number of classes. Default: -1
        features_dim (int, optional): Dimension of input features. Default: -1
        randomized_dim (int, optional): Dimension of features after randomized. Default: 1024
        reduction (str, optional): Specifies the reduction to apply to the output:
          ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
          ``'mean'``: the sum of the output will be divided by the number of
          elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``

    .. note::
        You need to provide `num_classes`, `features_dim` and `randomized_dim` **only when** `randomized`
        is set True.

    Inputs:
        - g_s (tensor): unnormalized classifier predictions on source domain, :math:`g^s`
        - f_s (tensor): feature representations on source domain, :math:`f^s`
        - g_t (tensor): unnormalized classifier predictions on target domain, :math:`g^t`
        - f_t (tensor): feature representations on target domain, :math:`f^t`

    Shape:
        - g_s, g_t: :math:`(minibatch, C)` where C means the number of classes.
        - f_s, f_t: :math:`(minibatch, F)` where F means the dimension of input features.
        - Output: scalar by default. If :attr:`reduction` is ``'none'``, then :math:`(minibatch, )`.

    Examples::

        >>> from dalib.modules.domain_discriminator import DomainDiscriminator
        >>> from dalib.adaptation.cdan import ConditionalDomainAdversarialLoss
        >>> import torch
        >>> num_classes = 2
        >>> feature_dim = 1024
        >>> batch_size = 10
        >>> discriminator = DomainDiscriminator(in_feature=feature_dim * num_classes, hidden_size=1024)
        >>> loss = ConditionalDomainAdversarialLoss(discriminator, reduction='mean')
        >>> # features from source domain and target domain
        >>> f_s, f_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim)
        >>> # logits output from source domain adn target domain
        >>> g_s, g_t = torch.randn(batch_size, num_classes), torch.randn(batch_size, num_classes)
        >>> output = loss(g_s, f_s, g_t, f_t)
    """

    def __init__(self, domain_discriminator: nn.Module, entropy_conditioning: Optional[bool] = False,
                 randomized: Optional[bool] = False, num_classes: Optional[int] = -1,
                 features_dim: Optional[int] = -1, randomized_dim: Optional[int] = 1024,
                 reduction: Optional[str] = 'mean'):
        super(ConditionalDomainAdversarialLoss, self).__init__()
        self.domain_discriminator = domain_discriminator
        self.grl = WarmStartGradientReverseLayer(alpha=1., lo=0., hi=1., max_iters=1000, auto_step=True)
        self.entropy_conditioning = entropy_conditioning

        if randomized:
            assert num_classes > 0 and features_dim > 0 and randomized_dim > 0
            self.map = RandomizedMultiLinearMap(features_dim, num_classes, randomized_dim)
        else:
            self.map = MultiLinearMap()

        self.bce = lambda input, target, weight: F.binary_cross_entropy(input, target, weight,
                                                                        reduction=reduction) if self.entropy_conditioning \
            else F.binary_cross_entropy(input, target, reduction=reduction)
        self.domain_discriminator_accuracy = None

    def forward(self, g_s: torch.Tensor, f_s: torch.Tensor, g_t: torch.Tensor, f_t: torch.Tensor) -> torch.Tensor:
        f = torch.cat((f_s, f_t), dim=0)
        g = torch.cat((g_s, g_t), dim=0)
        g = F.softmax(g, dim=1).detach()
        h = self.grl(self.map(f, g))
        d = self.domain_discriminator(h)
        d_label = torch.cat((
            torch.ones((g_s.size(0), 1)).to(g_s.device),
            torch.zeros((g_t.size(0), 1)).to(g_t.device),
        ))
        weight = 1.0 + torch.exp(-entropy(g))
        batch_size = f.size(0)
        weight = weight / torch.sum(weight) * batch_size
        return self.bce(d, d_label, weight.view_as(d))


class RandomizedMultiLinearMap(nn.Module):
    """Random multi linear map

    Given two inputs :math:`f` and :math:`g`, the definition is

    .. math::
        T_{\odot}(f,g) = \dfrac{1}{\sqrt{d}} (R_f f) \odot (R_g g),

    where :math:`\odot` is element-wise product, :math:`R_f` and :math:`R_g` are random matrices
    sampled only once and ﬁxed in training.

    Args:
        features_dim (int): dimension of input :math:`f`
        num_classes (int): dimension of input :math:`g`
        output_dim (int, optional): dimension of output tensor. Default: 1024

    Shape:
        - f: (minibatch, features_dim)
        - g: (minibatch, num_classes)
        - Outputs: (minibatch, output_dim)
    """

    def __init__(self, features_dim: int, num_classes: int, output_dim: Optional[int] = 1024):
        super(RandomizedMultiLinearMap, self).__init__()
        self.Rf = torch.randn(features_dim, output_dim)
        self.Rg = torch.randn(num_classes, output_dim)
        self.output_dim = output_dim

    def forward(self, f: torch.Tensor, g: torch.Tensor) -> torch.Tensor:
        f = torch.mm(f, self.Rf.to(f.device))
        g = torch.mm(g, self.Rg.to(g.device))
        output = torch.mul(f, g) / np.sqrt(float(self.output_dim))
        return output


class MultiLinearMap(nn.Module):
    """Multi linear map

    Shape:
        - f: (minibatch, F)
        - g: (minibatch, C)
        - Outputs: (minibatch, F * C)
    """

    def __init__(self):
        super(MultiLinearMap, self).__init__()

    def forward(self, f: torch.Tensor, g: torch.Tensor) -> torch.Tensor:
        batch_size = f.size(0)
        output = torch.bmm(g.unsqueeze(2), f.unsqueeze(1))
        return output.view(batch_size, -1)


from typing import Optional, Any, Tuple
import numpy as np
import torch.nn as nn
from torch.autograd import Function
import torch


class GradientReverseFunction(Function):

    @staticmethod
    def forward(ctx: Any, input: torch.Tensor, coeff: Optional[float] = 1.) -> torch.Tensor:
        ctx.coeff = coeff
        output = input * 1.0
        return output

    @staticmethod
    def backward(ctx: Any, grad_output: torch.Tensor) -> Tuple[torch.Tensor, Any]:
        return grad_output.neg() * ctx.coeff, None


class GradientReverseLayer(nn.Module):
    def __init__(self):
        super(GradientReverseLayer, self).__init__()

    def forward(self, *input):
        return GradientReverseFunction.apply(*input)


class WarmStartGradientReverseLayer(nn.Module):
    """Gradient Reverse Layer :math:`\mathcal{R}(x)` with warm start

        The forward and backward behaviours are:

        .. math::
            \mathcal{R}(x) = x,

            \dfrac{ d\mathcal{R}} {dx} = - \lambda I.

        :math:`\lambda` is initiated at :math:`lo` and is gradually changed to :math:`hi` using the following schedule:

        .. math::
            \lambda = \dfrac{2(hi-lo)}{1+\exp(- α \dfrac{i}{N})} - (hi-lo) + lo

        where :math:`i` is the iteration step.

        Args:
            alpha (float, optional): :math:`α`. Default: 1.0
            lo (float, optional): Initial value of :math:`\lambda`. Default: 0.0
            hi (float, optional): Final value of :math:`\lambda`. Default: 1.0
            max_iters (int, optional): :math:`N`. Default: 1000
            auto_step (bool, optional): If True, increase :math:`i` each time `forward` is called.
              Otherwise use function `step` to increase :math:`i`. Default: False
        """

    def __init__(self, alpha: Optional[float] = 1.0, lo: Optional[float] = 0.0, hi: Optional[float] = 1.,
                 max_iters: Optional[int] = 1000., auto_step: Optional[bool] = False):
        super(WarmStartGradientReverseLayer, self).__init__()
        self.alpha = alpha
        self.lo = lo
        self.hi = hi
        self.iter_num = 0
        self.max_iters = max_iters
        self.auto_step = auto_step

    def forward(self, input: torch.Tensor) -> torch.Tensor:
        """"""
        coeff = float(
            2.0 * (self.hi - self.lo) / (1.0 + np.exp(-self.alpha * self.iter_num / self.max_iters))
            - (self.hi - self.lo) + self.lo
        )
        if self.auto_step:
            self.step()
        return GradientReverseFunction.apply(input, coeff)

    def step(self):
        """Increase iteration number :math:`i` by 1"""
        self.iter_num += 1

# Model

In [3]:
class GraphAttentionLayer(nn.Module):
    def __init__(self, in_dim, out_dim, **kwargs):
        super().__init__()

        # attention map
        self.att_proj = nn.Linear(in_dim, out_dim)
        self.att_weight = self._init_new_params(out_dim, 1)

        # project
        self.proj_with_att = nn.Linear(in_dim, out_dim)
        self.proj_without_att = nn.Linear(in_dim, out_dim)

        # batch norm
        self.bn = nn.BatchNorm1d(out_dim)

        # dropout for inputs
        self.input_drop = nn.Dropout(p=0.2)

        # activate
        self.act = nn.SELU(inplace=True)

        # temperature
        self.temp = 1.
        if "temperature" in kwargs:
            self.temp = kwargs["temperature"]

    def forward(self, x):
        '''
        x   :(#bs, #node, #dim)
        '''
        # apply input dropout
        x = self.input_drop(x)

        # derive attention map
        att_map = self._derive_att_map(x)

        # projection
        x = self._project(x, att_map)

        # apply batch norm
        x = self._apply_BN(x)
        x = self.act(x)
        return x

    def _pairwise_mul_nodes(self, x):
        '''
        Calculates pairwise multiplication of nodes.
        - for attention map
        x           :(#bs, #node, #dim)
        out_shape   :(#bs, #node, #node, #dim)
        '''

        nb_nodes = x.size(1)
        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
        x_mirror = x.transpose(1, 2)

        return x * x_mirror

    def _derive_att_map(self, x):
        '''
        x           :(#bs, #node, #dim)
        out_shape   :(#bs, #node, #node, 1)
        '''
        att_map = self._pairwise_mul_nodes(x)
        # size: (#bs, #node, #node, #dim_out)
        att_map = torch.tanh(self.att_proj(att_map))
        # size: (#bs, #node, #node, 1)
        att_map = torch.matmul(att_map, self.att_weight)

        # apply temperature
        att_map = att_map / self.temp

        att_map = F.softmax(att_map, dim=-2)

        return att_map

    def _project(self, x, att_map):
        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
        x2 = self.proj_without_att(x)

        return x1 + x2

    def _apply_BN(self, x):
        org_size = x.size()
        x = x.view(-1, org_size[-1])
        x = self.bn(x)
        x = x.view(org_size)

        return x

    def _init_new_params(self, *size):
        out = nn.Parameter(torch.FloatTensor(*size))
        nn.init.xavier_normal_(out)
        return out


class HtrgGraphAttentionLayer(nn.Module):
    def __init__(self, in_dim, out_dim, **kwargs):
        super().__init__()

        self.proj_type1 = nn.Linear(in_dim, in_dim)
        self.proj_type2 = nn.Linear(in_dim, in_dim)

        # attention map
        self.att_proj = nn.Linear(in_dim, out_dim)
        self.att_projM = nn.Linear(in_dim, out_dim)

        self.att_weight11 = self._init_new_params(out_dim, 1)
        self.att_weight22 = self._init_new_params(out_dim, 1)
        self.att_weight12 = self._init_new_params(out_dim, 1)
        self.att_weightM = self._init_new_params(out_dim, 1)

        # project
        self.proj_with_att = nn.Linear(in_dim, out_dim)
        self.proj_without_att = nn.Linear(in_dim, out_dim)

        self.proj_with_attM = nn.Linear(in_dim, out_dim)
        self.proj_without_attM = nn.Linear(in_dim, out_dim)

        # batch norm
        self.bn = nn.BatchNorm1d(out_dim)

        # dropout for inputs
        self.input_drop = nn.Dropout(p=0.2)

        # activate
        self.act = nn.SELU(inplace=True)

        # temperature
        self.temp = 1.
        if "temperature" in kwargs:
            self.temp = kwargs["temperature"]

    def forward(self, x1, x2, master=None):
        '''
        x1  :(#bs, #node, #dim)
        x2  :(#bs, #node, #dim)
        '''
        num_type1 = x1.size(1)
        num_type2 = x2.size(1)

        x1 = self.proj_type1(x1)
        x2 = self.proj_type2(x2)

        x = torch.cat([x1, x2], dim=1)

        if master is None:
            master = torch.mean(x, dim=1, keepdim=True)

        # apply input dropout
        x = self.input_drop(x)

        # derive attention map
        att_map = self._derive_att_map(x, num_type1, num_type2)

        # directional edge for master node
        master = self._update_master(x, master)

        # projection
        x = self._project(x, att_map)

        # apply batch norm
        x = self._apply_BN(x)
        x = self.act(x)

        x1 = x.narrow(1, 0, num_type1)
        x2 = x.narrow(1, num_type1, num_type2)

        return x1, x2, master

    def _update_master(self, x, master):

        att_map = self._derive_att_map_master(x, master)
        master = self._project_master(x, master, att_map)

        return master

    def _pairwise_mul_nodes(self, x):
        '''
        Calculates pairwise multiplication of nodes.
        - for attention map
        x           :(#bs, #node, #dim)
        out_shape   :(#bs, #node, #node, #dim)
        '''

        nb_nodes = x.size(1)
        x = x.unsqueeze(2).expand(-1, -1, nb_nodes, -1)
        x_mirror = x.transpose(1, 2)

        return x * x_mirror

    def _derive_att_map_master(self, x, master):
        '''
        x           :(#bs, #node, #dim)
        out_shape   :(#bs, #node, #node, 1)
        '''
        att_map = x * master
        att_map = torch.tanh(self.att_projM(att_map))

        att_map = torch.matmul(att_map, self.att_weightM)

        # apply temperature
        att_map = att_map / self.temp

        att_map = F.softmax(att_map, dim=-2)

        return att_map

    def _derive_att_map(self, x, num_type1, num_type2):
        '''
        x           :(#bs, #node, #dim)
        out_shape   :(#bs, #node, #node, 1)
        '''
        att_map = self._pairwise_mul_nodes(x)
        # size: (#bs, #node, #node, #dim_out)
        att_map = torch.tanh(self.att_proj(att_map))
        # size: (#bs, #node, #node, 1)

        att_board = torch.zeros_like(att_map[:, :, :, 0]).unsqueeze(-1)

        att_board[:, :num_type1, :num_type1, :] = torch.matmul(
            att_map[:, :num_type1, :num_type1, :], self.att_weight11)
        att_board[:, num_type1:, num_type1:, :] = torch.matmul(
            att_map[:, num_type1:, num_type1:, :], self.att_weight22)
        att_board[:, :num_type1, num_type1:, :] = torch.matmul(
            att_map[:, :num_type1, num_type1:, :], self.att_weight12)
        att_board[:, num_type1:, :num_type1, :] = torch.matmul(
            att_map[:, num_type1:, :num_type1, :], self.att_weight12)

        att_map = att_board

        # att_map = torch.matmul(att_map, self.att_weight12)

        # apply temperature
        att_map = att_map / self.temp

        att_map = F.softmax(att_map, dim=-2)

        return att_map

    def _project(self, x, att_map):
        x1 = self.proj_with_att(torch.matmul(att_map.squeeze(-1), x))
        x2 = self.proj_without_att(x)

        return x1 + x2

    def _project_master(self, x, master, att_map):

        x1 = self.proj_with_attM(torch.matmul(
            att_map.squeeze(-1).unsqueeze(1), x))
        x2 = self.proj_without_attM(master)

        return x1 + x2

    def _apply_BN(self, x):
        org_size = x.size()
        x = x.view(-1, org_size[-1])
        x = self.bn(x)
        x = x.view(org_size)

        return x

    def _init_new_params(self, *size):
        out = nn.Parameter(torch.FloatTensor(*size))
        nn.init.xavier_normal_(out)
        return out


class GraphPool(nn.Module):
    def __init__(self, k: float, in_dim: int, p: Union[float, int]):
        super().__init__()
        self.k = k
        self.sigmoid = nn.Sigmoid()
        self.proj = nn.Linear(in_dim, 1)
        self.drop = nn.Dropout(p=p) if p > 0 else nn.Identity()
        self.in_dim = in_dim

    def forward(self, h):
        Z = self.drop(h)
        weights = self.proj(Z)
        scores = self.sigmoid(weights)
        new_h = self.top_k_graph(scores, h, self.k)

        return new_h

    def top_k_graph(self, scores, h, k):
        """
        args
        =====
        scores: attention-based weights (#bs, #node, 1)
        h: graph data (#bs, #node, #dim)
        k: ratio of remaining nodes, (float)

        returns
        =====
        h: graph pool applied data (#bs, #node', #dim)
        """
        _, n_nodes, n_feat = h.size()
        n_nodes = max(int(n_nodes * k), 1)
        _, idx = torch.topk(scores, n_nodes, dim=1)
        idx = idx.expand(-1, -1, n_feat)

        h = h * scores
        h = torch.gather(h, 1, idx)

        return h


class CONV(nn.Module):
    @staticmethod
    def to_mel(hz):
        return 2595 * np.log10(1 + hz / 700)

    @staticmethod
    def to_hz(mel):
        return 700 * (10**(mel / 2595) - 1)

    def __init__(self,
                 out_channels,
                 kernel_size,
                 sample_rate=16000,
                 in_channels=1,
                 stride=1,
                 padding=0,
                 dilation=1,
                 bias=False,
                 groups=1,
                 mask=False):
        super().__init__()
        if in_channels != 1:

            msg = "SincConv only support one input channel (here, in_channels = {%i})" % (
                in_channels)
            raise ValueError(msg)
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.sample_rate = sample_rate

        # Forcing the filters to be odd (i.e, perfectly symmetrics)
        if kernel_size % 2 == 0:
            self.kernel_size = self.kernel_size + 1
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.mask = mask
        if bias:
            raise ValueError('SincConv does not support bias.')
        if groups > 1:
            raise ValueError('SincConv does not support groups.')

        NFFT = 512
        f = int(self.sample_rate / 2) * np.linspace(0, 1, int(NFFT / 2) + 1)
        fmel = self.to_mel(f)
        fmelmax = np.max(fmel)
        fmelmin = np.min(fmel)
        filbandwidthsmel = np.linspace(fmelmin, fmelmax, self.out_channels + 1)
        filbandwidthsf = self.to_hz(filbandwidthsmel)

        self.mel = filbandwidthsf
        self.hsupp = torch.arange(-(self.kernel_size - 1) / 2,
                                  (self.kernel_size - 1) / 2 + 1)
        self.band_pass = torch.zeros(self.out_channels, self.kernel_size)
        for i in range(len(self.mel) - 1):
            fmin = self.mel[i]
            fmax = self.mel[i + 1]
            hHigh = (2*fmax/self.sample_rate) * \
                np.sinc(2*fmax*self.hsupp/self.sample_rate)
            hLow = (2*fmin/self.sample_rate) * \
                np.sinc(2*fmin*self.hsupp/self.sample_rate)
            hideal = hHigh - hLow

            self.band_pass[i, :] = Tensor(np.hamming(
                self.kernel_size)) * Tensor(hideal)

    def forward(self, x, mask=False):
        band_pass_filter = self.band_pass.clone().to(x.device)
        if mask:
            A = np.random.uniform(0, 20)
            A = int(A)
            A0 = random.randint(0, band_pass_filter.shape[0] - A)
            band_pass_filter[A0:A0 + A, :] = 0
        else:
            band_pass_filter = band_pass_filter

        self.filters = (band_pass_filter).view(self.out_channels, 1,
                                               self.kernel_size)

        return F.conv1d(x,
                        self.filters,
                        stride=self.stride,
                        padding=self.padding,
                        dilation=self.dilation,
                        bias=None,
                        groups=1)


class Residual_block(nn.Module):
    def __init__(self, nb_filts, first=False):
        super().__init__()
        self.first = first

        if not self.first:
            self.bn1 = nn.BatchNorm2d(num_features=nb_filts[0])
        self.conv1 = nn.Conv2d(in_channels=nb_filts[0],
                               out_channels=nb_filts[1],
                               kernel_size=(2, 3),
                               padding=(1, 1),
                               stride=1)
        self.selu = nn.SELU(inplace=True)

        self.bn2 = nn.BatchNorm2d(num_features=nb_filts[1])
        self.conv2 = nn.Conv2d(in_channels=nb_filts[1],
                               out_channels=nb_filts[1],
                               kernel_size=(2, 3),
                               padding=(0, 1),
                               stride=1)

        if nb_filts[0] != nb_filts[1]:
            self.downsample = True
            self.conv_downsample = nn.Conv2d(in_channels=nb_filts[0],
                                             out_channels=nb_filts[1],
                                             padding=(0, 1),
                                             kernel_size=(1, 3),
                                             stride=1)

        else:
            self.downsample = False
        self.mp = nn.MaxPool2d((1, 3))  # self.mp = nn.MaxPool2d((1,4))

    def forward(self, x):
        identity = x
        if not self.first:
            out = self.bn1(x)
            out = self.selu(out)
        else:
            out = x
        out = self.conv1(x)

        # print('out',out.shape)
        out = self.bn2(out)
        out = self.selu(out)
        # print('out',out.shape)
        out = self.conv2(out)
        #print('conv2 out',out.shape)
        if self.downsample:
            identity = self.conv_downsample(identity)

        out += identity
        out = self.mp(out)
        return out


class Model(nn.Module):
    def __init__(self, d_args):
        super().__init__()

        self.d_args = d_args
        filts = d_args["filts"]
        gat_dims = d_args["gat_dims"]
        pool_ratios = d_args["pool_ratios"]
        temperatures = d_args["temperatures"]

        self.conv_time = CONV(out_channels=filts[0],
                              kernel_size=d_args["first_conv"],
                              in_channels=1)
        self.first_bn = nn.BatchNorm2d(num_features=1)

        self.drop = nn.Dropout(0.5, inplace=True)
        self.drop_way = nn.Dropout(0.2, inplace=True)
        self.selu = nn.SELU(inplace=True)

        self.encoder = nn.Sequential(
            nn.Sequential(Residual_block(nb_filts=filts[1], first=True)),
            nn.Sequential(Residual_block(nb_filts=filts[2])),
            nn.Sequential(Residual_block(nb_filts=filts[3])),
            nn.Sequential(Residual_block(nb_filts=filts[4])),
            nn.Sequential(Residual_block(nb_filts=filts[4])),
            nn.Sequential(Residual_block(nb_filts=filts[4])))

        self.pos_S = nn.Parameter(torch.randn(1, 23, filts[-1][-1]))
        self.master1 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))
        self.master2 = nn.Parameter(torch.randn(1, 1, gat_dims[0]))

        self.GAT_layer_S = GraphAttentionLayer(filts[-1][-1],
                                               gat_dims[0],
                                               temperature=temperatures[0])
        self.GAT_layer_T = GraphAttentionLayer(filts[-1][-1],
                                               gat_dims[0],
                                               temperature=temperatures[1])

        self.HtrgGAT_layer_ST11 = HtrgGraphAttentionLayer(
            gat_dims[0], gat_dims[1], temperature=temperatures[2])
        self.HtrgGAT_layer_ST12 = HtrgGraphAttentionLayer(
            gat_dims[1], gat_dims[1], temperature=temperatures[2])

        self.HtrgGAT_layer_ST21 = HtrgGraphAttentionLayer(
            gat_dims[0], gat_dims[1], temperature=temperatures[2])

        self.HtrgGAT_layer_ST22 = HtrgGraphAttentionLayer(
            gat_dims[1], gat_dims[1], temperature=temperatures[2])

        self.pool_S = GraphPool(pool_ratios[0], gat_dims[0], 0.3)
        self.pool_T = GraphPool(pool_ratios[1], gat_dims[0], 0.3)
        self.pool_hS1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
        self.pool_hT1 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)

        self.pool_hS2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)
        self.pool_hT2 = GraphPool(pool_ratios[2], gat_dims[1], 0.3)

        self.out_layer = nn.Linear(5 * gat_dims[1], 2)

    def forward(self, x, Freq_aug=False):

        x = x.unsqueeze(1)
        x = self.conv_time(x, mask=Freq_aug)
        x = x.unsqueeze(dim=1)
        x = F.max_pool2d(torch.abs(x), (3, 3))
        x = self.first_bn(x)
        x = self.selu(x)

        # get embeddings using encoder
        # (#bs, #filt, #spec, #seq)
        e = self.encoder(x)

        # spectral GAT (GAT-S)
        e_S, _ = torch.max(torch.abs(e), dim=3)  # max along time
        e_S = e_S.transpose(1, 2) + self.pos_S

        gat_S = self.GAT_layer_S(e_S)
        out_S = self.pool_S(gat_S)  # (#bs, #node, #dim)

        # temporal GAT (GAT-T)
        e_T, _ = torch.max(torch.abs(e), dim=2)  # max along freq
        e_T = e_T.transpose(1, 2)

        gat_T = self.GAT_layer_T(e_T)
        out_T = self.pool_T(gat_T)

        # learnable master node
        master1 = self.master1.expand(x.size(0), -1, -1)
        master2 = self.master2.expand(x.size(0), -1, -1)

        # inference 1
        out_T1, out_S1, master1 = self.HtrgGAT_layer_ST11(
            out_T, out_S, master=self.master1)

        out_S1 = self.pool_hS1(out_S1)
        out_T1 = self.pool_hT1(out_T1)

        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST12(
            out_T1, out_S1, master=master1)
        out_T1 = out_T1 + out_T_aug
        out_S1 = out_S1 + out_S_aug
        master1 = master1 + master_aug

        # inference 2
        out_T2, out_S2, master2 = self.HtrgGAT_layer_ST21(
            out_T, out_S, master=self.master2)
        out_S2 = self.pool_hS2(out_S2)
        out_T2 = self.pool_hT2(out_T2)

        out_T_aug, out_S_aug, master_aug = self.HtrgGAT_layer_ST22(
            out_T2, out_S2, master=master2)
        out_T2 = out_T2 + out_T_aug
        out_S2 = out_S2 + out_S_aug
        master2 = master2 + master_aug

        out_T1 = self.drop_way(out_T1)
        out_T2 = self.drop_way(out_T2)
        out_S1 = self.drop_way(out_S1)
        out_S2 = self.drop_way(out_S2)
        master1 = self.drop_way(master1)
        master2 = self.drop_way(master2)

        out_T = torch.max(out_T1, out_T2)
        out_S = torch.max(out_S1, out_S2)
        master = torch.max(master1, master2)

        T_max, _ = torch.max(torch.abs(out_T), dim=1)
        T_avg = torch.mean(out_T, dim=1)

        S_max, _ = torch.max(torch.abs(out_S), dim=1)
        S_avg = torch.mean(out_S, dim=1)

        last_hidden = torch.cat(
            [T_max, T_avg, S_max, S_avg, master.squeeze(1)], dim=1)

        last_hidden = self.drop(last_hidden)
        output = self.out_layer(last_hidden)

        return output,last_hidden

# RawBoost

In [4]:
'''
   Hemlata Tak, Madhu Kamble, Jose Patino, Massimiliano Todisco, Nicholas Evans.
   RawBoost: A Raw Data Boosting and Augmentation Method applied to Automatic Speaker Verification Anti-Spoofing.
   In Proc. ICASSP 2022, pp:6382--6386.
'''

def randRange(x1, x2, integer):
    y = np.random.uniform(low=x1, high=x2, size=(1,))
    if integer:
        y = int(y)
    return y

def normWav(x,always):
    if always:
        x = x/np.amax(abs(x))
    elif np.amax(abs(x)) > 1:
            x = x/np.amax(abs(x))
    return x


def genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs):
    b = 1
    for i in range(0, nBands):
        fc = randRange(minF,maxF,0);
        bw = randRange(minBW,maxBW,0);
        c = randRange(minCoeff,maxCoeff,1);
          
        if c/2 == int(c/2):
            c = c + 1
        f1 = fc - bw/2
        f2 = fc + bw/2
        if f1 <= 0:
            f1 = 1/1000
        if f2 >= fs/2:
            f2 =  fs/2-1/1000
        b = np.convolve(signal.firwin(c, [float(f1), float(f2)], window='hamming', fs=fs),b)

    G = randRange(minG,maxG,0); 
    _, h = signal.freqz(b, 1, fs=fs)    
    b = pow(10, G/20)*b/np.amax(abs(h))   
    return b


def filterFIR(x,b):
    N = b.shape[0] + 1
    xpad = np.pad(x, (0, N), 'constant')
    y = signal.lfilter(b, 1, xpad)
    y = y[int(N/2):int(y.shape[0]-N/2)]
    return y

# Linear and non-linear convolutive noise
def LnL_convolutive_noise(x,N_f,nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,minBiasLinNonLin,maxBiasLinNonLin,fs):
    y = [0] * x.shape[0]
    for i in range(0, N_f):
        if i == 1:
            minG = minG-minBiasLinNonLin;
            maxG = maxG-maxBiasLinNonLin;
        b = genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs)
        y = y + filterFIR(np.power(x, (i+1)),  b)     
    y = y - np.mean(y)
    y = normWav(y,0)
    return y


# Impulsive signal dependent noise
def ISD_additive_noise(x, P, g_sd):
    beta = randRange(0, P, 0)
    
    y = copy.deepcopy(x)
    x_len = x.shape[0]
    n = int(x_len*(beta/100))
    p = np.random.permutation(x_len)[:n]
    f_r= np.multiply(((2*np.random.rand(p.shape[0]))-1),((2*np.random.rand(p.shape[0]))-1))
    r = g_sd * x[p] * f_r
    y[p] = x[p] + r
    y = normWav(y,0)
    return y


# Stationary signal independent noise

def SSI_additive_noise(x,SNRmin,SNRmax,nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs):
    noise = np.random.normal(0, 1, x.shape[0])
    b = genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs)
    noise = filterFIR(noise, b)
    noise = normWav(noise,1)
    SNR = randRange(SNRmin, SNRmax, 0)
    noise = noise / np.linalg.norm(noise,2) * np.linalg.norm(x,2) / 10.0**(0.05 * SNR)
    x = x + noise
    return x

# Data Utils

In [5]:
def pad(x, max_len=80000):
    x_len = x.shape[0]
    if x_len >= max_len:
        return x[:max_len]
    # need to pad
    num_repeats = int(max_len / x_len)+1
    padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
    return padded_x	

class Dataset_ASVspoof_train(Dataset):
    def __init__(self,args,list_IDs, labels,algo):
            '''self.list_IDs	: list of strings (each string: utt key),
               self.labels      : dictionary (key: utt key, value: label integer)'''
            self.list_IDs = list_IDs
            self.labels = labels
            self.algo=algo
            self.args=args
            self.cut=80000 # take ~4 sec audio (64600 samples)
    def get_labels(self):
            return self.labels
        
    def __len__(self):
            return len(self.list_IDs)

    def __getitem__(self, index):
            X,fs = librosa.load('./train16000/'+self.list_IDs[index]+'.wav', sr=16000)

            Y1= pad(X,self.cut)
            tmp = np.random.randint(0, len(self.list_IDs))
            X,fs = librosa.load('./train16000/'+self.list_IDs[tmp]+'.wav', sr=16000)
            Y2= pad(X,self.cut)
            Y1 = process_Rawboost_feature(Y1 + Y2,fs,self.args,self.algo)     
            return Tensor(Y1), Tensor([(self.labels[index] and self.labels[tmp])^1,self.labels[index] or self.labels[tmp]])



class Dataset_ASVspoof_dev(Dataset):
    def __init__(self, list_IDs,args,algo):
            '''self.list_IDs	: list of strings (each string: utt key),
               '''
            self.args = args
            self.algo = algo
            self.list_IDs = list_IDs
            self.cut=80000 # take ~4 sec audio (64600 samples)
    
    def __len__(self):
            return len(self.list_IDs)
        
    def __getitem__(self, index):
            utt_id = self.list_IDs[index]
            X, fs = librosa.load('./unlabeled_data16000/'+utt_id+'.wav', sr=16000)
            Y=process_Rawboost_feature(X,fs,self.args,self.algo)
            X_pad = pad(Y,self.cut)
            x_inp = Tensor(X_pad)
            return x_inp
        

class Dataset_ASVspoof_eval(Dataset):
    def __init__(self, list_IDs):
            '''self.list_IDs	: list of strings (each string: utt key),
               '''
               
            self.list_IDs = list_IDs
            self.cut=80000 # take ~4 sec audio (64600 samples)
    
    def __len__(self):
            return len(self.list_IDs)
        
    def __getitem__(self, index):
            utt_id = self.list_IDs[index]
            X, fs = librosa.load('./test16000/'+utt_id+'.wav', sr=16000)
            X_pad = pad(X,self.cut)
            x_inp = Tensor(X_pad)
            return x_inp

#--------------RawBoost data augmentation algorithms---------------------------##

def process_Rawboost_feature(feature, sr,args,algo):
    
    # Data process by Convolutive noise (1st algo)
    if algo==1:

        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)
                            
    # Data process by Impulsive noise (2nd algo)
    elif algo==2:
        
        feature=ISD_additive_noise(feature, args.P, args.g_sd)
                            
    # Data process by coloured additive noise (3rd algo)
    elif algo==3:
        
        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr)
    
    # Data process by all 3 algo. together in series (1+2+3)
    elif algo==4:
        
        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
        feature=ISD_additive_noise(feature, args.P, args.g_sd)  
        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,
                args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr)                 

    # Data process by 1st two algo. together in series (1+2)
    elif algo==5:
        
        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
        feature=ISD_additive_noise(feature, args.P, args.g_sd)                
                            

    # Data process by 1st and 3rd algo. together in series (1+3)
    elif algo==6:  
        
        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) 

    # Data process by 2nd and 3rd algo. together in series (2+3)
    elif algo==7: 
        
        feature=ISD_additive_noise(feature, args.P, args.g_sd)
        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) 
   
    # Data process by 1st two algo. together in Parallel (1||2)
    elif algo==8:
        
        feature1 =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
        feature2=ISD_additive_noise(feature, args.P, args.g_sd)

        feature_para=feature1+feature2
        feature=normWav(feature_para,0)  #normalized resultant waveform
 
    # original data without Rawboost processing           
    else:
        
        feature=feature
    
    return feature

In [6]:

T_co = TypeVar('T_co', covariant=True)
T = TypeVar('T')


def send_to_device(tensor, device):
    """
    Recursively sends the elements in a nested list/tuple/dictionary of tensors to a given device.

    Args:
        tensor (nested list/tuple/dictionary of :obj:`torch.Tensor`):
            The data to send to a given device.
        device (:obj:`torch.device`):
            The device to send the data to

    Returns:
        The same data structure as :obj:`tensor` with all tensors sent to the proper device.
    """
    if isinstance(tensor, (list, tuple)):
        return type(tensor)(send_to_device(t, device) for t in tensor)
    elif isinstance(tensor, dict):
        return type(tensor)({k: send_to_device(v, device) for k, v in tensor.items()})
    elif not hasattr(tensor, "to"):
        return tensor
    return tensor.to(device)


class ForeverDataIterator:
    r"""A data iterator that will never stop producing data"""

    def __init__(self, data_loader: DataLoader, device=None):
        self.data_loader = data_loader
        self.iter = iter(self.data_loader)
        self.device = device

    def __next__(self):
        try:
            data = next(self.iter)
            if self.device is not None:
                data = send_to_device(data, self.device)
        except StopIteration:
            self.iter = iter(self.data_loader)
            data = next(self.iter)
            if self.device is not None:
                data = send_to_device(data, self.device)
        return data

    def __len__(self):
        return len(self.data_loader)


def entropy(predictions: torch.Tensor, reduction='none') -> torch.Tensor:
    r"""Entropy of prediction.
    The definition is:

    .. math::
        entropy(p) = - \sum_{c=1}^C p_c \log p_c

    where C is number of classes.

    Args:
        predictions (tensor): Classifier predictions. Expected to contain raw, normalized scores for each class
        reduction (str, optional): Specifies the reduction to apply to the output:
          ``'none'`` | ``'mean'``. ``'none'``: no reduction will be applied,
          ``'mean'``: the sum of the output will be divided by the number of
          elements in the output. Default: ``'mean'``

    Shape:
        - predictions: :math:`(minibatch, C)` where C means the number of classes.
        - Output: :math:`(minibatch, )` by default. If :attr:`reduction` is ``'mean'``, then scalar.
    """
    epsilon = 1e-5
    H = -predictions * torch.log(predictions + epsilon)
    H = H.sum(dim=1)
    if reduction == 'mean':
        return H.mean()
    else:
        return H



class MinimumClassConfusionLoss(nn.Module):
    r"""
    Minimum Class Confusion loss minimizes the class confusion in the target predictions.

    You can see more details in `Minimum Class Confusion for Versatile Domain Adaptation (ECCV 2020) <https://arxiv.org/abs/1912.03699>`_

    Args:
        temperature (float) : The temperature for rescaling, the prediction will shrink to vanilla softmax if
          temperature is 1.0.

    .. note::
        Make sure that temperature is larger than 0.

    Inputs: g_t
        - g_t (tensor): unnormalized classifier predictions on target domain, :math:`g^t`

    Shape:
        - g_t: :math:`(minibatch, C)` where C means the number of classes.
        - Output: scalar.

    Examples::
        >>> temperature = 2.0
        >>> loss = MinimumClassConfusionLoss(temperature)
        >>> # logits output from target domain
        >>> g_t = torch.randn(batch_size, num_classes)
        >>> output = loss(g_t)

    MCC can also serve as a regularizer for existing methods.
    Examples::
        >>> from dalib.modules.domain_discriminator import DomainDiscriminator
        >>> num_classes = 2
        >>> feature_dim = 1024
        >>> batch_size = 10
        >>> temperature = 2.0
        >>> discriminator = DomainDiscriminator(in_feature=feature_dim, hidden_size=1024)
        >>> cdan_loss = ConditionalDomainAdversarialLoss(discriminator, reduction='mean')
        >>> mcc_loss = MinimumClassConfusionLoss(temperature)
        >>> # features from source domain and target domain
        >>> f_s, f_t = torch.randn(batch_size, feature_dim), torch.randn(batch_size, feature_dim)
        >>> # logits output from source domain adn target domain
        >>> g_s, g_t = torch.randn(batch_size, num_classes), torch.randn(batch_size, num_classes)
        >>> total_loss = cdan_loss(g_s, f_s, g_t, f_t) + mcc_loss(g_t)
    """

    def __init__(self, temperature: float):
        super(MinimumClassConfusionLoss, self).__init__()
        self.temperature = temperature

    def forward(self, logits: torch.Tensor) -> torch.Tensor:
        batch_size, num_classes = logits.shape
        predictions = F.softmax(logits / self.temperature, dim=1)  # batch_size x num_classes
        entropy_weight = entropy(predictions).detach()
        entropy_weight = 1 + torch.exp(-entropy_weight)
        entropy_weight = (batch_size * entropy_weight / torch.sum(entropy_weight)).unsqueeze(dim=1)  # batch_size x 1
        class_confusion_matrix = torch.mm((predictions * entropy_weight).transpose(1, 0), predictions) # num_classes x num_classes
        class_confusion_matrix = class_confusion_matrix / torch.sum(class_confusion_matrix, dim=1)
        mcc_loss = (torch.sum(class_confusion_matrix) - torch.trace(class_confusion_matrix)) / num_classes
        return mcc_loss

# Seed

In [7]:
def set_random_seed(random_seed, args=None):
    """ set_random_seed(random_seed, args=None)
    
    Set the random_seed for numpy, python, and cudnn
    
    input
    -----
      random_seed: integer random seed
      args: argue parser
    """
    
    # initialization                                       
    torch.manual_seed(random_seed)
    random.seed(random_seed)
    np.random.seed(random_seed)
    os.environ['PYTHONHASHSEED'] = str(random_seed)

    #For torch.backends.cudnn.deterministic
    #Note: this default configuration may result in RuntimeError
    #see https://pytorch.org/docs/stable/notes/randomness.html    
    if args is None:
        cudnn_deterministic = True
        cudnn_benchmark = False
    else:
        cudnn_deterministic = args.cudnn_deterministic_toggle
        cudnn_benchmark = args.cudnn_benchmark_toggle
    
        if not cudnn_deterministic:
            print("cudnn_deterministic set to False")
        if cudnn_benchmark:
            print("cudnn_benchmark set to True")
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_seed)
        torch.backends.cudnn.deterministic = cudnn_deterministic
        torch.backends.cudnn.benchmark = cudnn_benchmark
    return

# Metrics

In [8]:
def expected_calibration_error(y_true, y_prob, n_bins=10):
    prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=n_bins, strategy='uniform')
    bin_totals = np.histogram(y_prob, bins=np.linspace(0, 1, n_bins + 1), density=False)[0]
    non_empty_bins = bin_totals > 0
    bin_weights = bin_totals / len(y_prob)
    bin_weights = bin_weights[non_empty_bins]
    prob_true = prob_true[:len(bin_weights)]
    prob_pred = prob_pred[:len(bin_weights)]
    ece = np.sum(bin_weights * np.abs(prob_true - prob_pred))
    return ece
    
def auc_brier_ece(answer_df, submission_df):
    # Check for missing values in submission_df
    if submission_df.isnull().values.any():
        raise ValueError("The submission dataframe contains missing values.")


    # Check if the number and names of columns are the same in both dataframes
    if len(answer_df.columns) != len(submission_df.columns) or not all(answer_df.columns == submission_df.columns):
        raise ValueError("The columns of the answer and submission dataframes do not match.")
        
    submission_df = submission_df[submission_df.iloc[:, 0].isin(answer_df.iloc[:, 0])]
    submission_df.index = range(submission_df.shape[0])
    
    # Calculate AUC for each class
    auc_scores = []
    for column in answer_df.columns[1:]:
        y_true = answer_df[column]
        y_scores = submission_df[column]
        auc = roc_auc_score(y_true, y_scores)
        auc_scores.append(auc)


    # Calculate mean AUC
    mean_auc = np.mean(auc_scores)


    brier_scores = []
    ece_scores = []
    
    # Calculate Brier Score and ECE for each class
    for column in answer_df.columns[1:]:
        y_true = answer_df[column].values
        y_prob = submission_df[column].values
        
        # Brier Score
        brier = mean_squared_error(y_true, y_prob)
        brier_scores.append(brier)
        
        # ECE
        ece = expected_calibration_error(y_true, y_prob)
        ece_scores.append(ece)
    
    # Calculate mean Brier Score and mean ECE
    mean_brier = np.mean(brier_scores)
    mean_ece = np.mean(ece_scores)
    
    # Calculate combined score
    combined_score = 0.5 * (1 - mean_auc) + 0.25 * mean_brier + 0.25 * mean_ece
    
    return combined_score

# Main

In [9]:
def produce_evaluation_file(
    data_loader: DataLoader,
    model,
    device: torch.device,
    id):
    """Perform evaluation and save the score to a file"""
    model.eval()

    predictions = []
    trues = []
    with torch.no_grad():
        for batch_x, batch_y in tqdm(data_loader, leave=False):
            batch_x = batch_x.to(device)
            batch_out = torch.sigmoid(model(batch_x)[0]).cpu().detach().numpy()
            # add outputs
            predictions += batch_out.tolist()
            trues += batch_y.numpy().tolist()
    predictions = np.array(predictions)
    trues = np.array(trues)
    sub = pd.DataFrame({'id':id,
                  'fake':predictions[:,0],
                  'real':predictions[:,1]})

    ans = pd.DataFrame({'id':id,
                  'fake':trues[:,0],
                  'real':trues[:,1]})

    combine_score = auc_brier_ece(ans, sub)
    return combine_score

def inference(data_loader, model, device, save_path):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(data_loader)):
            features = features.to(device)
            
            probs = torch.sigmoid(model(features)[0])
            
            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    submit = pd.read_csv('./sample_submission.csv')
    submit.iloc[:, 1:] = predictions
    submit.to_csv(save_path, index=False)
    
def label_smoothing(labels, smoothing=0.1):

    assert 0 <= smoothing < 1
    with torch.no_grad():
        smoothed_labels = labels * (1 - smoothing) + smoothing / 2
        
    return smoothed_labels

def train(train_source_iter: ForeverDataIterator, train_target_iter: ForeverDataIterator, model,
          domain_adv: ConditionalDomainAdversarialLoss,optimizer, ad_optimizer,
          lr_scheduler: LambdaLR, lr_scheduler_ad,
          epoch: int, args: argparse.Namespace):
    # switch to train mode
    model.train()
    domain_adv.train()
    criterion = nn.BCEWithLogitsLoss()
    total_cls_loss = 0
    total_transfer_loss = 0
    total_loss = 0
    for i in tqdm(range(args.iters_per_epoch)):
        x_s, labels_s = next(train_source_iter)
        x_t= next(train_target_iter)

        x_s = x_s.to(device)
        x_t = x_t.to(device)
        labels_s = label_smoothing(labels_s).to(device)

        # measure data loading time
        optimizer.zero_grad()
        ad_optimizer.zero_grad()

        # compute task loss for first step
        x = torch.cat((x_s, x_t), dim=0)
        y, f = model(x)
        y_s, y_t = y.chunk(2, dim=0)
        f_s, f_t = f.chunk(2, dim=0)
        cls_loss = criterion(y_s, labels_s)
        loss = cls_loss 
        loss.backward()

        # Calculate ϵ̂ (w) and add it to the weights
        optimizer.first_step(zero_grad=True)

        # Calculate task loss and domain loss
        y, f = model(x)
        y_s, y_t = y.chunk(2, dim=0)
        f_s, f_t = f.chunk(2, dim=0)

        cls_loss = criterion(y_s, labels_s)
        transfer_loss = domain_adv(y_s, f_s, y_t, f_t)
        loss = cls_loss + transfer_loss * 1.
        
        total_cls_loss += cls_loss
        total_transfer_loss += transfer_loss
        total_loss += loss
        
        loss.backward()
        # Update parameters of domain classifier
        ad_optimizer.step()
        # Update parameters (Sharpness-Aware update)
        optimizer.second_step(zero_grad=True)
    lr_scheduler.step(total_cls_loss)
    lr_scheduler_ad.step(total_transfer_loss)

    return total_cls_loss, total_transfer_loss, total_loss

In [10]:
"""
@author: Junguang Jiang
@contact: JiangJunguang1123@outlook.com
"""

__all__ = ['DomainDiscriminator']


class DomainDiscriminator(nn.Sequential):
    r"""Domain discriminator model from
    `Domain-Adversarial Training of Neural Networks (ICML 2015) <https://arxiv.org/abs/1505.07818>`_

    Distinguish whether the input features come from the source domain or the target domain.
    The source domain label is 1 and the target domain label is 0.

    Args:
        in_feature (int): dimension of the input feature
        hidden_size (int): dimension of the hidden features
        batch_norm (bool): whether use :class:`~torch.nn.BatchNorm1d`.
            Use :class:`~torch.nn.Dropout` if ``batch_norm`` is False. Default: True.

    Shape:
        - Inputs: (minibatch, `in_feature`)
        - Outputs: :math:`(minibatch, 1)`
    """

    def __init__(self, in_feature: int, hidden_size: int, batch_norm=True):
        if batch_norm:
            super(DomainDiscriminator, self).__init__(
                nn.Linear(in_feature, hidden_size),
                nn.BatchNorm1d(hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size, hidden_size),
                nn.BatchNorm1d(hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size, 1),
                nn.Sigmoid()
            )
        else:
            super(DomainDiscriminator, self).__init__(
                nn.Linear(in_feature, hidden_size),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(hidden_size, hidden_size),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(hidden_size, 1),
                nn.Sigmoid()
            )

    def get_parameters(self) -> List[Dict]:
        return [{"params": self.parameters(), "lr": 1.}]

In [11]:
parser = argparse.ArgumentParser(description='baseline')
# Hyperparameters
parser.add_argument('--batch_size', type=int, default=8)
parser.add_argument('--num_epochs', type=int, default=100)
parser.add_argument('--is_scheduler', action='store_true', default=True,help='use scheduler(default-False)')

# model
parser.add_argument('--seed', type=int, default=42, 
                    help='random seed (default: 42)')


##modify
parser.add_argument('--lr', '--learning-rate', default=1e-2, type=float,
                        metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--lr-gamma', default=0.001,
                    type=float, help='parameter for lr scheduler')
parser.add_argument('--lr-decay', default=0.75,
                    type=float, help='parameter for lr scheduler')
parser.add_argument('--momentum', default=0.9,
                    type=float, metavar='M', help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-3, type=float,
                    metavar='W', help='weight decay (default: 1e-3)',
                    dest='weight_decay')

###
parser.add_argument('--model_path', type=str,
                    default=None, help='Model checkpoint')
parser.add_argument('--comment', type=str, default=None,
                    help='Comment to describe the saved model')

# Auxiliary arguments
parser.add_argument('--eval_output', type=str, default='epoch7.csv',
                    help='Path to save the evaluation result')
parser.add_argument('--eval', action='store_true', default=False,
                    help='eval mode')
parser.add_argument('--is_eval', action='store_true', default=False,help='eval database')
parser.add_argument('--eval_part', type=int, default=0)

# backend options
parser.add_argument('--cudnn-deterministic-toggle', action='store_false', \
                    default=True, 
                    help='use cudnn-deterministic? (default true)')    

parser.add_argument('--cudnn-benchmark-toggle', action='store_true', \
                    default=False, 
                    help='use cudnn-benchmark? (default false)') 

parser.add_argument('--temperature', default=2.0,
                        type=float, help='parameter temperature scaling')
parser.add_argument('--rho', type=float, default=0.05, help="GPU ID")
##===================================================Rawboost data augmentation ======================================================================#

parser.add_argument('--algo', type=int, default=0, 
                help='Rawboost algos discriptions. 0: No augmentation 1: LnL_convolutive_noise, 2: ISD_additive_noise, 3: SSI_additive_noise, 4: series algo (1+2+3), \
                5: series algo (1+2), 6: series algo (1+3), 7: series algo(2+3), 8: parallel algo(1,2) .default=0]')

# LnL_convolutive_noise parameters
parser.add_argument('--nBands', type=int, default=5, 
                help='number of notch filters.The higher the number of bands, the more aggresive the distortions is.[default=5]')
parser.add_argument('--minF', type=int, default=20, 
                help='minimum centre frequency [Hz] of notch filter.[default=20] ')
parser.add_argument('--maxF', type=int, default=8000, 
                help='maximum centre frequency [Hz] (<sr/2)  of notch filter.[default=8000]')
parser.add_argument('--minBW', type=int, default=100, 
                help='minimum width [Hz] of filter.[default=100] ')
parser.add_argument('--maxBW', type=int, default=1000, 
                help='maximum width [Hz] of filter.[default=1000] ')
parser.add_argument('--minCoeff', type=int, default=10, 
                help='minimum filter coefficients. More the filter coefficients more ideal the filter slope.[default=10]')
parser.add_argument('--maxCoeff', type=int, default=100, 
                help='maximum filter coefficients. More the filter coefficients more ideal the filter slope.[default=100]')
parser.add_argument('--minG', type=int, default=0, 
                help='minimum gain factor of linear component.[default=0]')
parser.add_argument('--maxG', type=int, default=0, 
                help='maximum gain factor of linear component.[default=0]')
parser.add_argument('--minBiasLinNonLin', type=int, default=5, 
                help=' minimum gain difference between linear and non-linear components.[default=5]')
parser.add_argument('--maxBiasLinNonLin', type=int, default=20, 
                help=' maximum gain difference between linear and non-linear components.[default=20]')
parser.add_argument('--N_f', type=int, default=5, 
                help='order of the (non-)linearity where N_f=1 refers only to linear components.[default=5]')

# ISD_additive_noise parameters
parser.add_argument('--P', type=int, default=10, 
                help='Maximum number of uniformly distributed samples in [%].[defaul=10]')
parser.add_argument('--g_sd', type=int, default=2, 
                help='gain parameters > 0. [default=2]')
parser.add_argument('-i', '--iters-per-epoch', default=1000, type=int,
                        help='Number of iterations per epoch')
# SSI_additive_noise parameters
parser.add_argument('--SNRmin', type=int, default=10, 
                help='Minimum SNR value for coloured additive noise.[defaul=10]')
parser.add_argument('--SNRmax', type=int, default=40, 
                help='Maximum SNR value for coloured additive noise.[defaul=40]')

##===================================================Rawboost data augmentation ======================================================================#


if not os.path.exists('models'):
    os.mkdir('models')
args = parser.parse_args(args=[])

#make experiment reproducible
set_random_seed(args.seed, args)

# track = args.track

# assert track in ['LA', 'PA','DF'], 'Invalid track given'

# #database
# prefix_2021 = 'ASVspoof2021.{}'.format(track)

#define model saving path
model_tag = ' adam_fold0_{}_{}_{}'.format(
         args.num_epochs, args.batch_size, args.lr)
if args.comment:
    model_tag = model_tag + '_{}'.format(args.comment)
model_save_path = os.path.join('models', model_tag)

#set model save directory
if not os.path.exists(model_save_path):
    os.mkdir(model_save_path)

#GPU device
device = 'cuda' if torch.cuda.is_available() else 'cpu'                  
print('Device: {}'.format(device))

for_model = {
        "architecture": "AASIST",
        "nb_samp": 80000,
        "first_conv": 128,
        "filts": [70, [1, 32], [32, 32], [32, 64], [64, 64]],
        "gat_dims": [64, 32],
        "pool_ratios": [0.5, 0.7, 0.5, 0.5],
        "temperatures": [2.0, 2.0, 100.0, 100.0]
    }
    
classifier = Model(for_model)
nb_params = sum([param.view(-1).size()[0] for param in classifier.parameters()])
print('nb_params:',nb_params)
classifier.to(device)

if args.model_path:
    classifier.load_state_dict(torch.load(args.model_path,map_location=device))
    print('Model loaded : {}'.format(args.model_path))

if args.eval:
    print('eval')
    file_eval = pd.read_csv('test.csv')
    print('no. of eval trials',len(file_eval))
    eval_set = Dataset_ASVspoof_eval(file_eval['id'].to_list())
    eval_loader = DataLoader(eval_set, batch_size=args.batch_size,num_workers=8, shuffle=False)
    inference(eval_loader, classifier, device, args.eval_output)
    sys.exit(1)

df = pd.read_csv('train.csv')
df['label'] = np.where(df['label']=='real',1,0)
target_df = pd.read_csv('unlabeled.csv')
X = df['id'].to_list()
y = df['label'].to_list()

# Stratified K-Fold 설정
k = 5
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)

# Stratified K-Fold 적용
folds = list(skf.split(X, y))

# 특정 fold 선택 (예: fold_index = 0)
fold_index = 0
train_indices, val_indices = folds[fold_index]

X_train = [X[i] for i in train_indices]
X_test = [X[i] for i in val_indices]
y_train = [y[i] for i in train_indices]
y_test = [y[i] for i in val_indices]
train_source_dataset = Dataset_ASVspoof_train(args,list_IDs = X_train,labels = y_train,algo=args.algo)
train_target_dataset = Dataset_ASVspoof_dev(list_IDs = target_df['0'].to_list(),args=args,algo=args.algo)



train_source_loader = DataLoader(train_source_dataset, batch_size=args.batch_size,
                                     shuffle=True, num_workers=8, drop_last=True)
train_target_loader = DataLoader(train_target_dataset, batch_size=args.batch_size,
                                     shuffle=True, num_workers=8, drop_last=True)

eval_set = Dataset_ASVspoof_dev(list_IDs = target_df['0'].to_list(),args=args,algo=args.algo)
eval_loader = DataLoader(eval_set, batch_size=args.batch_size,num_workers=8, shuffle=False)

train_source_iter = ForeverDataIterator(train_source_loader)
train_target_iter = ForeverDataIterator(train_target_loader)


dev_set = Dataset_ASVspoof_train(args,list_IDs = X_test,labels = y_test,algo=0)
dev_loader = DataLoader(dev_set, batch_size=args.batch_size,num_workers=8, shuffle=False)

domain_discri = DomainDiscriminator(
            160 * 2, hidden_size=1024).to(device)



base_optimizer = torch.optim.Adam
ad_optimizer = torch.optim.Adam(domain_discri.get_parameters(
), args.lr, weight_decay=args.weight_decay)
optimizer = SAM(classifier.parameters(), base_optimizer, rho=args.rho, adaptive=False,
        lr=args.lr,weight_decay=args.weight_decay)


lr_scheduler = LambdaLR(optimizer, lambda x: args.lr *
                            (1. + args.lr_gamma * float(x)) ** (-args.lr_decay))
lr_scheduler_ad = LambdaLR(
        ad_optimizer, lambda x: args.lr * (1. + args.lr_gamma * float(x)) ** (-args.lr_decay))

domain_adv = ConditionalDomainAdversarialLoss(
        domain_discri, entropy_conditioning=False,
        num_classes=2, features_dim=160, randomized=False,
        randomized_dim=1024
    ).to(device)


wandb.init(
        project=f"FOLD_ADAM_BEST",
        name='fold0',
        config=args
    )
for epoch in range(args.num_epochs):
    # train for one epoch
    
    total_cls_loss, total_transfer_loss, total_loss = train(train_source_iter, train_target_iter, classifier, domain_adv, optimizer, ad_optimizer,
          lr_scheduler, lr_scheduler_ad, 
          epoch, args)
    torch.save(classifier.state_dict(), os.path.join(
        model_save_path, 'epoch_{}.pth'.format(epoch)))
    
    # evaluate on validation set
    combine_score = produce_evaluation_file(dev_loader, classifier, device,X_test)

    data_iter = iter(eval_loader)
    wav = next(data_iter)

    print(torch.sigmoid(classifier(wav.to(device))[0]))
    print("Epoch:{}, combine_score:{:.6f}, cls_loss:{:.6f}, transfer_loss:{:.6f}, total_loss:{:.6f}\n".format(
        epoch, combine_score,total_cls_loss, total_transfer_loss, total_loss))
    wandb.log({
        'combine':combine_score,
        'lr':lr_scheduler.get_last_lr()[0],
        'ad_lr':lr_scheduler_ad.get_last_lr()[0],
        'total_cls_loss':total_cls_loss,
        'total_transfer_loss':total_transfer_loss,
        'total_loss':total_loss
            })

Device: cuda
nb_params: 297866


[34m[1mwandb[0m: Currently logged in as: [33mbeok[0m ([33mbeokay[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2105, 0.9299],
        [0.3643, 0.8791],
        [0.8311, 0.3683],
        [0.5121, 0.6753],
        [0.8009, 0.4767],
        [0.5401, 0.7637],
        [0.4443, 0.8340],
        [0.7395, 0.5710]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:0, combine_score:0.147711, cls_loss:640.689636, transfer_loss:477.222931, total_loss:1117.912476



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3218, 0.9054],
        [0.4024, 0.8980],
        [0.9032, 0.3751],
        [0.7326, 0.6078],
        [0.7090, 0.6536],
        [0.6909, 0.6822],
        [0.7951, 0.4876],
        [0.6634, 0.6947]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:1, combine_score:0.121804, cls_loss:577.031433, transfer_loss:612.625488, total_loss:1189.657349



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2841, 0.9198],
        [0.5423, 0.7641],
        [0.9601, 0.2076],
        [0.6964, 0.6742],
        [0.7268, 0.6890],
        [0.6069, 0.7176],
        [0.9290, 0.3188],
        [0.4665, 0.8617]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:2, combine_score:0.095252, cls_loss:544.355469, transfer_loss:669.910034, total_loss:1214.266602



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3297, 0.8991],
        [0.7972, 0.6137],
        [0.9748, 0.1695],
        [0.8184, 0.6005],
        [0.7935, 0.6183],
        [0.7588, 0.6345],
        [0.9522, 0.2754],
        [0.4839, 0.8780]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:3, combine_score:0.092990, cls_loss:514.410522, transfer_loss:678.934570, total_loss:1193.345825



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2860, 0.8951],
        [0.7376, 0.6690],
        [0.9706, 0.1730],
        [0.7866, 0.6641],
        [0.7987, 0.5956],
        [0.6680, 0.7145],
        [0.9560, 0.2466],
        [0.3479, 0.8724]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:4, combine_score:0.085190, cls_loss:495.657135, transfer_loss:677.773865, total_loss:1173.432373



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1624, 0.9428],
        [0.6610, 0.7356],
        [0.9635, 0.1797],
        [0.6231, 0.7857],
        [0.5914, 0.7859],
        [0.6040, 0.7646],
        [0.9153, 0.3460],
        [0.1609, 0.9402]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:5, combine_score:0.080354, cls_loss:478.408203, transfer_loss:680.823792, total_loss:1159.232422



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3033, 0.9162],
        [0.7974, 0.7173],
        [0.9681, 0.2131],
        [0.8228, 0.7153],
        [0.8476, 0.6321],
        [0.5673, 0.8498],
        [0.9676, 0.2118],
        [0.3059, 0.9171]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:6, combine_score:0.075496, cls_loss:462.111328, transfer_loss:685.746948, total_loss:1147.857422



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1954, 0.9209],
        [0.8912, 0.5626],
        [0.9675, 0.1939],
        [0.7597, 0.7870],
        [0.7751, 0.7130],
        [0.5891, 0.8281],
        [0.9598, 0.2371],
        [0.1614, 0.9239]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:7, combine_score:0.067980, cls_loss:446.768036, transfer_loss:682.747498, total_loss:1129.514771



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2932, 0.9110],
        [0.9024, 0.5405],
        [0.9569, 0.2033],
        [0.8247, 0.7638],
        [0.8644, 0.6680],
        [0.7460, 0.7750],
        [0.9524, 0.2321],
        [0.3131, 0.9149]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:8, combine_score:0.063241, cls_loss:438.641510, transfer_loss:684.977661, total_loss:1123.619141



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5719, 0.8780],
        [0.9257, 0.4527],
        [0.9575, 0.1417],
        [0.8574, 0.7388],
        [0.8874, 0.5624],
        [0.8606, 0.6856],
        [0.9575, 0.1872],
        [0.2536, 0.9022]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:9, combine_score:0.075709, cls_loss:426.450287, transfer_loss:682.915466, total_loss:1109.364624



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4060, 0.8944],
        [0.8647, 0.6441],
        [0.9631, 0.1244],
        [0.8654, 0.6860],
        [0.8991, 0.4902],
        [0.8329, 0.6936],
        [0.9547, 0.1683],
        [0.2231, 0.8933]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:10, combine_score:0.066268, cls_loss:419.027252, transfer_loss:684.706238, total_loss:1103.732788



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5397, 0.8823],
        [0.9199, 0.6119],
        [0.9573, 0.1721],
        [0.7942, 0.8493],
        [0.7936, 0.7801],
        [0.6851, 0.8633],
        [0.9535, 0.1837],
        [0.0405, 0.9462]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:11, combine_score:0.055747, cls_loss:409.842621, transfer_loss:685.468872, total_loss:1095.310913



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4762, 0.8753],
        [0.9338, 0.5191],
        [0.9490, 0.2020],
        [0.8400, 0.8254],
        [0.8909, 0.6780],
        [0.8463, 0.8057],
        [0.9503, 0.1804],
        [0.0630, 0.9283]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:12, combine_score:0.055191, cls_loss:404.329041, transfer_loss:686.883118, total_loss:1091.211914



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5219, 0.8878],
        [0.9318, 0.5324],
        [0.9498, 0.1550],
        [0.8165, 0.8588],
        [0.8435, 0.7695],
        [0.8558, 0.7956],
        [0.9490, 0.1767],
        [0.0391, 0.9353]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:13, combine_score:0.049566, cls_loss:399.180481, transfer_loss:687.651245, total_loss:1086.831299



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3997, 0.8927],
        [0.9214, 0.6596],
        [0.9302, 0.1622],
        [0.8063, 0.8630],
        [0.8734, 0.7345],
        [0.8836, 0.7947],
        [0.9354, 0.1897],
        [0.0459, 0.9196]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:14, combine_score:0.051559, cls_loss:398.354462, transfer_loss:686.528931, total_loss:1084.883667



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2389, 0.9023],
        [0.9160, 0.7838],
        [0.9361, 0.1210],
        [0.5473, 0.8875],
        [0.8294, 0.7971],
        [0.8425, 0.8449],
        [0.9387, 0.1754],
        [0.0243, 0.9344]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:15, combine_score:0.054272, cls_loss:389.910553, transfer_loss:689.476135, total_loss:1079.386719



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7098, 0.8861],
        [0.9318, 0.5448],
        [0.9354, 0.1209],
        [0.8607, 0.8167],
        [0.9085, 0.6149],
        [0.9139, 0.7137],
        [0.9371, 0.1627],
        [0.0661, 0.8931]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:16, combine_score:0.047306, cls_loss:384.152283, transfer_loss:688.589966, total_loss:1072.742310



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5927, 0.8916],
        [0.9112, 0.7551],
        [0.9337, 0.1592],
        [0.8620, 0.8276],
        [0.9107, 0.6242],
        [0.8158, 0.8351],
        [0.9411, 0.1821],
        [0.0476, 0.9042]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:17, combine_score:0.044499, cls_loss:376.490967, transfer_loss:690.317139, total_loss:1066.808716



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5834, 0.9113],
        [0.9198, 0.7346],
        [0.9222, 0.1014],
        [0.8152, 0.8651],
        [0.9122, 0.6439],
        [0.8593, 0.8355],
        [0.9297, 0.1425],
        [0.0464, 0.9062]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:18, combine_score:0.043756, cls_loss:378.621857, transfer_loss:689.484680, total_loss:1068.106689



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7627, 0.8917],
        [0.9338, 0.6323],
        [0.9258, 0.1054],
        [0.8911, 0.8500],
        [0.9194, 0.6054],
        [0.9314, 0.6642],
        [0.9294, 0.1791],
        [0.0708, 0.8846]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:19, combine_score:0.043503, cls_loss:373.307190, transfer_loss:689.721802, total_loss:1063.029541



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5704, 0.9102],
        [0.9349, 0.7433],
        [0.9294, 0.1437],
        [0.8956, 0.9015],
        [0.9188, 0.6802],
        [0.8529, 0.8676],
        [0.9313, 0.1498],
        [0.0208, 0.9289]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:20, combine_score:0.042378, cls_loss:370.361725, transfer_loss:687.977417, total_loss:1058.339478



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4766, 0.9073],
        [0.9351, 0.8026],
        [0.9328, 0.2354],
        [0.8719, 0.8956],
        [0.8614, 0.8232],
        [0.8399, 0.8943],
        [0.9396, 0.1635],
        [0.0228, 0.9168]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:21, combine_score:0.043555, cls_loss:370.205444, transfer_loss:690.111938, total_loss:1060.317749



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7280, 0.9003],
        [0.9444, 0.5093],
        [0.9375, 0.0791],
        [0.9109, 0.7593],
        [0.9140, 0.6126],
        [0.9355, 0.6415],
        [0.9400, 0.0902],
        [0.0249, 0.9219]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:22, combine_score:0.042103, cls_loss:364.065277, transfer_loss:689.759888, total_loss:1053.825317



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8523, 0.9012],
        [0.9389, 0.7097],
        [0.9322, 0.0991],
        [0.9121, 0.8646],
        [0.9155, 0.6702],
        [0.9170, 0.8735],
        [0.9356, 0.1062],
        [0.0798, 0.9034]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:23, combine_score:0.043429, cls_loss:364.541779, transfer_loss:688.965271, total_loss:1053.507568



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5277, 0.8899],
        [0.9236, 0.8136],
        [0.9203, 0.1114],
        [0.8513, 0.8670],
        [0.9076, 0.6782],
        [0.8160, 0.8706],
        [0.9254, 0.1067],
        [0.0321, 0.9118]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:24, combine_score:0.041427, cls_loss:361.015137, transfer_loss:692.199402, total_loss:1053.214233



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7898, 0.8989],
        [0.9370, 0.6691],
        [0.9234, 0.0735],
        [0.9055, 0.8030],
        [0.9250, 0.4796],
        [0.9275, 0.7919],
        [0.9309, 0.0913],
        [0.0831, 0.9012]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:25, combine_score:0.043044, cls_loss:359.919098, transfer_loss:690.793762, total_loss:1050.712524



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1362, 0.9142],
        [0.8895, 0.8747],
        [0.9237, 0.1234],
        [0.7939, 0.8671],
        [0.9246, 0.6677],
        [0.7035, 0.8934],
        [0.9302, 0.1540],
        [0.0393, 0.9066]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:26, combine_score:0.041267, cls_loss:358.254852, transfer_loss:689.729004, total_loss:1047.982544



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2516, 0.9134],
        [0.9218, 0.8093],
        [0.9319, 0.0887],
        [0.8878, 0.8570],
        [0.9123, 0.7375],
        [0.8662, 0.8832],
        [0.9393, 0.1002],
        [0.0260, 0.9111]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:27, combine_score:0.038132, cls_loss:356.960205, transfer_loss:689.739563, total_loss:1046.699341



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6467, 0.9064],
        [0.9368, 0.8156],
        [0.9380, 0.0820],
        [0.8916, 0.8231],
        [0.9329, 0.5385],
        [0.9068, 0.8930],
        [0.9378, 0.1069],
        [0.0470, 0.9026]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:28, combine_score:0.037778, cls_loss:349.383240, transfer_loss:691.663513, total_loss:1041.046753



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4215, 0.9089],
        [0.9223, 0.8289],
        [0.9307, 0.1089],
        [0.9131, 0.8082],
        [0.9080, 0.7838],
        [0.9198, 0.8608],
        [0.9326, 0.1291],
        [0.0592, 0.8967]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:29, combine_score:0.036949, cls_loss:349.458923, transfer_loss:691.290100, total_loss:1040.748779



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6991, 0.9108],
        [0.9421, 0.8276],
        [0.9525, 0.0706],
        [0.9179, 0.7992],
        [0.9344, 0.7320],
        [0.9077, 0.9064],
        [0.9489, 0.0886],
        [0.1011, 0.8868]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:30, combine_score:0.039141, cls_loss:349.156189, transfer_loss:690.090271, total_loss:1039.246704



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5274, 0.9059],
        [0.9004, 0.8641],
        [0.9291, 0.1630],
        [0.9134, 0.8409],
        [0.9160, 0.7769],
        [0.9117, 0.8856],
        [0.9292, 0.1802],
        [0.0869, 0.8919]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:31, combine_score:0.037323, cls_loss:351.864227, transfer_loss:691.446411, total_loss:1043.311157



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5002, 0.9090],
        [0.9255, 0.8334],
        [0.9372, 0.0716],
        [0.8900, 0.8774],
        [0.9228, 0.7108],
        [0.8683, 0.9019],
        [0.9374, 0.1010],
        [0.0821, 0.8927]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:32, combine_score:0.037185, cls_loss:343.553894, transfer_loss:691.715820, total_loss:1035.270142



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3429, 0.9222],
        [0.9153, 0.8825],
        [0.9327, 0.1181],
        [0.9005, 0.8894],
        [0.9055, 0.8383],
        [0.8640, 0.9150],
        [0.9376, 0.1042],
        [0.0248, 0.9152]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:33, combine_score:0.033700, cls_loss:346.299805, transfer_loss:691.694153, total_loss:1037.994629



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4433, 0.9083],
        [0.9031, 0.8759],
        [0.9376, 0.0808],
        [0.8640, 0.8763],
        [0.9268, 0.7142],
        [0.8888, 0.8895],
        [0.9346, 0.1481],
        [0.1556, 0.9016]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:34, combine_score:0.035473, cls_loss:347.283508, transfer_loss:690.830200, total_loss:1038.113281



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1848, 0.9092],
        [0.9178, 0.8592],
        [0.9208, 0.1495],
        [0.9053, 0.8745],
        [0.8372, 0.8860],
        [0.8009, 0.9066],
        [0.9312, 0.1450],
        [0.0148, 0.9277]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:35, combine_score:0.042446, cls_loss:340.420837, transfer_loss:690.918030, total_loss:1031.337891



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2190, 0.9012],
        [0.8524, 0.8998],
        [0.9336, 0.0898],
        [0.8957, 0.8780],
        [0.9180, 0.8212],
        [0.7580, 0.9085],
        [0.9354, 0.1409],
        [0.0629, 0.8980]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:36, combine_score:0.032731, cls_loss:335.245819, transfer_loss:691.036804, total_loss:1026.282593



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7682, 0.9244],
        [0.9124, 0.9061],
        [0.9160, 0.0943],
        [0.9097, 0.8566],
        [0.9198, 0.7542],
        [0.9042, 0.9042],
        [0.9250, 0.0924],
        [0.0734, 0.8967]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:37, combine_score:0.036025, cls_loss:341.009491, transfer_loss:688.452820, total_loss:1029.461914



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5589, 0.9092],
        [0.8731, 0.8618],
        [0.9339, 0.0722],
        [0.8587, 0.8590],
        [0.9288, 0.6264],
        [0.8856, 0.8973],
        [0.9350, 0.1518],
        [0.0591, 0.9024]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:38, combine_score:0.034361, cls_loss:341.229828, transfer_loss:691.026917, total_loss:1032.256348



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5861, 0.9101],
        [0.9189, 0.8108],
        [0.9383, 0.0718],
        [0.8765, 0.8561],
        [0.9054, 0.7971],
        [0.9159, 0.8382],
        [0.9407, 0.0830],
        [0.0472, 0.9078]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:39, combine_score:0.034312, cls_loss:343.661560, transfer_loss:691.742188, total_loss:1035.403931



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8069, 0.9256],
        [0.9115, 0.8924],
        [0.9408, 0.0936],
        [0.9038, 0.8277],
        [0.9350, 0.7495],
        [0.9446, 0.8032],
        [0.9407, 0.0843],
        [0.0852, 0.9071]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:40, combine_score:0.038434, cls_loss:338.074829, transfer_loss:692.774475, total_loss:1030.849365



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8456, 0.9235],
        [0.9320, 0.7118],
        [0.9281, 0.0785],
        [0.9229, 0.7239],
        [0.9307, 0.6236],
        [0.9286, 0.8333],
        [0.9347, 0.0733],
        [0.0601, 0.8944]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:41, combine_score:0.033845, cls_loss:332.270020, transfer_loss:691.229919, total_loss:1023.499878



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6340, 0.9129],
        [0.9244, 0.8553],
        [0.9405, 0.0814],
        [0.9289, 0.8135],
        [0.9258, 0.7681],
        [0.9162, 0.8999],
        [0.9428, 0.0761],
        [0.0621, 0.8970]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:42, combine_score:0.031909, cls_loss:332.950317, transfer_loss:692.011475, total_loss:1024.961670



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6092, 0.9136],
        [0.9065, 0.8881],
        [0.9358, 0.0936],
        [0.9308, 0.7841],
        [0.9338, 0.7164],
        [0.9374, 0.8640],
        [0.9378, 0.0991],
        [0.0229, 0.9087]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:43, combine_score:0.032670, cls_loss:332.185638, transfer_loss:689.677246, total_loss:1021.862610



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6632, 0.9121],
        [0.8450, 0.8867],
        [0.9385, 0.1149],
        [0.8894, 0.8612],
        [0.9189, 0.8269],
        [0.8926, 0.9179],
        [0.9405, 0.0756],
        [0.0575, 0.9160]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:44, combine_score:0.033203, cls_loss:333.581055, transfer_loss:689.073792, total_loss:1022.654663



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2036, 0.9053],
        [0.8528, 0.8916],
        [0.9259, 0.1498],
        [0.8478, 0.8742],
        [0.8987, 0.8502],
        [0.7754, 0.9160],
        [0.9326, 0.1140],
        [0.0290, 0.9179]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:45, combine_score:0.037418, cls_loss:332.049072, transfer_loss:689.123901, total_loss:1021.172363



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5415, 0.9090],
        [0.9283, 0.8264],
        [0.9336, 0.0615],
        [0.8316, 0.8870],
        [0.9003, 0.8265],
        [0.9226, 0.8768],
        [0.9357, 0.0662],
        [0.0481, 0.9091]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:46, combine_score:0.034510, cls_loss:328.471710, transfer_loss:690.402222, total_loss:1018.874207



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5733, 0.9147],
        [0.9364, 0.8788],
        [0.9352, 0.2107],
        [0.9016, 0.8596],
        [0.9143, 0.8426],
        [0.8927, 0.9268],
        [0.9352, 0.1027],
        [0.0664, 0.9041]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:47, combine_score:0.035114, cls_loss:331.561554, transfer_loss:691.024780, total_loss:1022.586121



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.9180, 0.9211],
        [0.9523, 0.7123],
        [0.9333, 0.0399],
        [0.9125, 0.8002],
        [0.9393, 0.3653],
        [0.9468, 0.8192],
        [0.9272, 0.0657],
        [0.2097, 0.9049]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:48, combine_score:0.035465, cls_loss:334.207397, transfer_loss:690.513306, total_loss:1024.720825



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5131, 0.9147],
        [0.9198, 0.8825],
        [0.9310, 0.1358],
        [0.8470, 0.8826],
        [0.9382, 0.7244],
        [0.8114, 0.9161],
        [0.9337, 0.1514],
        [0.1330, 0.9032]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:49, combine_score:0.034085, cls_loss:332.570404, transfer_loss:691.337463, total_loss:1023.908020



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6301, 0.9168],
        [0.9256, 0.8272],
        [0.9358, 0.0756],
        [0.8460, 0.8394],
        [0.9355, 0.6602],
        [0.9274, 0.8647],
        [0.9322, 0.0946],
        [0.0808, 0.9089]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:50, combine_score:0.033839, cls_loss:327.524506, transfer_loss:691.980835, total_loss:1019.505737



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7870, 0.9231],
        [0.9446, 0.8248],
        [0.9444, 0.1249],
        [0.9201, 0.7649],
        [0.9143, 0.8567],
        [0.8885, 0.9280],
        [0.9409, 0.0704],
        [0.0588, 0.9019]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:51, combine_score:0.034802, cls_loss:329.524078, transfer_loss:687.898926, total_loss:1017.422668



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6679, 0.9121],
        [0.9262, 0.7861],
        [0.9351, 0.1059],
        [0.9220, 0.7939],
        [0.9175, 0.7155],
        [0.8773, 0.9081],
        [0.9381, 0.0879],
        [0.0476, 0.8998]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:52, combine_score:0.031060, cls_loss:324.099304, transfer_loss:689.430176, total_loss:1013.528076



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7031, 0.9228],
        [0.9182, 0.7424],
        [0.9271, 0.0979],
        [0.8991, 0.8085],
        [0.8896, 0.8284],
        [0.8945, 0.9148],
        [0.9345, 0.0661],
        [0.0560, 0.9150]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:53, combine_score:0.034451, cls_loss:328.227448, transfer_loss:690.597961, total_loss:1018.825073



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3289, 0.9158],
        [0.9125, 0.8883],
        [0.9129, 0.1555],
        [0.9006, 0.8725],
        [0.9097, 0.8150],
        [0.8750, 0.9181],
        [0.9251, 0.1504],
        [0.0805, 0.9138]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:54, combine_score:0.032375, cls_loss:327.543182, transfer_loss:689.578247, total_loss:1017.120544



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3059, 0.9066],
        [0.9303, 0.7966],
        [0.9287, 0.0569],
        [0.8949, 0.8739],
        [0.8593, 0.8828],
        [0.8893, 0.9110],
        [0.9311, 0.0705],
        [0.0355, 0.9193]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:55, combine_score:0.032168, cls_loss:321.765442, transfer_loss:691.625366, total_loss:1013.390625



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1043, 0.9183],
        [0.9014, 0.8965],
        [0.9478, 0.0879],
        [0.9044, 0.8677],
        [0.9194, 0.7983],
        [0.8669, 0.9198],
        [0.9425, 0.1615],
        [0.1200, 0.9140]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:56, combine_score:0.031303, cls_loss:323.947510, transfer_loss:691.948914, total_loss:1015.896301



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2472, 0.9075],
        [0.9279, 0.8445],
        [0.9383, 0.0740],
        [0.9054, 0.8099],
        [0.9011, 0.8334],
        [0.9190, 0.9063],
        [0.9393, 0.0595],
        [0.0809, 0.9012]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:57, combine_score:0.031916, cls_loss:321.899719, transfer_loss:691.892029, total_loss:1013.791077



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3757, 0.9130],
        [0.9446, 0.7425],
        [0.9380, 0.0643],
        [0.9131, 0.8376],
        [0.9277, 0.7671],
        [0.9409, 0.8914],
        [0.9364, 0.0836],
        [0.0892, 0.9129]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:58, combine_score:0.031454, cls_loss:322.285492, transfer_loss:691.758179, total_loss:1014.043457



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3019, 0.9107],
        [0.9358, 0.8360],
        [0.9438, 0.0476],
        [0.8636, 0.8768],
        [0.9389, 0.6966],
        [0.9235, 0.9126],
        [0.9338, 0.0971],
        [0.3101, 0.8947]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:59, combine_score:0.032716, cls_loss:321.085114, transfer_loss:690.538635, total_loss:1011.624023



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4134, 0.9225],
        [0.9426, 0.8357],
        [0.9305, 0.0535],
        [0.9017, 0.8654],
        [0.9284, 0.7954],
        [0.9339, 0.8936],
        [0.9272, 0.0864],
        [0.0632, 0.9137]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:60, combine_score:0.032475, cls_loss:318.065002, transfer_loss:689.550476, total_loss:1007.614624



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4826, 0.9203],
        [0.9473, 0.7345],
        [0.9378, 0.0370],
        [0.8829, 0.8860],
        [0.9264, 0.8313],
        [0.9350, 0.8992],
        [0.9317, 0.0783],
        [0.1331, 0.9096]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:61, combine_score:0.030996, cls_loss:316.984985, transfer_loss:691.341492, total_loss:1008.326172



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3209, 0.9140],
        [0.9248, 0.8470],
        [0.9429, 0.0488],
        [0.8905, 0.8599],
        [0.9093, 0.7610],
        [0.9039, 0.9121],
        [0.9346, 0.0788],
        [0.1079, 0.9016]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:62, combine_score:0.032972, cls_loss:323.066345, transfer_loss:689.587952, total_loss:1012.655640



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1160, 0.9125],
        [0.9388, 0.8269],
        [0.9351, 0.0509],
        [0.8971, 0.8755],
        [0.7505, 0.8974],
        [0.8969, 0.9255],
        [0.9310, 0.0533],
        [0.0171, 0.9146]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:63, combine_score:0.031889, cls_loss:316.678345, transfer_loss:691.907654, total_loss:1008.586182



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3970, 0.9097],
        [0.9440, 0.7620],
        [0.9420, 0.1197],
        [0.8773, 0.8396],
        [0.8542, 0.8859],
        [0.9339, 0.9002],
        [0.9415, 0.0929],
        [0.0341, 0.9017]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:64, combine_score:0.034285, cls_loss:321.386688, transfer_loss:691.503906, total_loss:1012.890320



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6147, 0.9186],
        [0.9380, 0.6522],
        [0.9368, 0.0557],
        [0.9225, 0.7147],
        [0.9360, 0.6723],
        [0.9438, 0.8762],
        [0.9362, 0.0521],
        [0.1766, 0.8966]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:65, combine_score:0.032778, cls_loss:319.695038, transfer_loss:691.529724, total_loss:1011.224792



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3724, 0.9177],
        [0.9131, 0.8268],
        [0.9454, 0.0381],
        [0.8786, 0.8713],
        [0.8975, 0.8630],
        [0.8650, 0.9162],
        [0.9355, 0.0787],
        [0.0532, 0.9065]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:66, combine_score:0.030181, cls_loss:317.307190, transfer_loss:691.103210, total_loss:1008.410461



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7857, 0.9294],
        [0.9445, 0.7206],
        [0.9476, 0.0281],
        [0.8817, 0.8844],
        [0.9315, 0.5764],
        [0.9445, 0.8875],
        [0.9368, 0.0668],
        [0.1135, 0.9049]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:67, combine_score:0.030249, cls_loss:315.206787, transfer_loss:692.403931, total_loss:1007.610962



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6845, 0.9243],
        [0.9378, 0.7679],
        [0.9423, 0.0448],
        [0.8828, 0.8810],
        [0.9254, 0.8144],
        [0.9424, 0.8931],
        [0.9370, 0.0679],
        [0.1335, 0.9014]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:68, combine_score:0.029776, cls_loss:309.256622, transfer_loss:692.218628, total_loss:1001.474976



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4672, 0.9185],
        [0.9443, 0.7779],
        [0.9520, 0.0552],
        [0.9161, 0.7261],
        [0.9038, 0.8644],
        [0.9014, 0.9219],
        [0.9519, 0.0378],
        [0.0764, 0.8968]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:69, combine_score:0.029726, cls_loss:310.655975, transfer_loss:691.492859, total_loss:1002.148621



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4925, 0.9272],
        [0.9444, 0.8104],
        [0.9432, 0.0480],
        [0.8626, 0.8523],
        [0.9219, 0.8295],
        [0.9122, 0.9295],
        [0.9407, 0.0507],
        [0.0826, 0.9100]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:70, combine_score:0.030535, cls_loss:319.111328, transfer_loss:690.822388, total_loss:1009.934082



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4716, 0.9176],
        [0.9504, 0.7817],
        [0.9440, 0.0781],
        [0.9111, 0.8357],
        [0.8866, 0.8949],
        [0.9245, 0.9151],
        [0.9371, 0.0750],
        [0.0899, 0.9079]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:71, combine_score:0.033652, cls_loss:314.263000, transfer_loss:691.398743, total_loss:1005.661926



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4623, 0.9298],
        [0.9563, 0.8328],
        [0.9513, 0.0537],
        [0.9037, 0.8949],
        [0.9125, 0.8949],
        [0.8851, 0.9371],
        [0.9457, 0.0500],
        [0.0346, 0.9130]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:72, combine_score:0.029068, cls_loss:311.462769, transfer_loss:691.516846, total_loss:1002.979980



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8024, 0.9343],
        [0.9512, 0.6186],
        [0.9459, 0.0521],
        [0.9272, 0.7374],
        [0.9350, 0.8418],
        [0.9468, 0.9160],
        [0.9370, 0.0632],
        [0.0948, 0.9090]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:73, combine_score:0.034687, cls_loss:311.539062, transfer_loss:691.849609, total_loss:1003.388245



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4632, 0.9219],
        [0.9296, 0.8402],
        [0.9354, 0.0717],
        [0.9013, 0.8508],
        [0.9214, 0.7541],
        [0.9362, 0.8898],
        [0.9232, 0.1145],
        [0.3056, 0.8992]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:74, combine_score:0.029479, cls_loss:311.878143, transfer_loss:691.721375, total_loss:1003.599854



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2275, 0.9195],
        [0.8659, 0.9038],
        [0.9392, 0.1319],
        [0.8960, 0.8769],
        [0.8571, 0.9179],
        [0.7836, 0.9214],
        [0.9389, 0.1378],
        [0.0853, 0.9122]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:75, combine_score:0.030048, cls_loss:312.619659, transfer_loss:691.954285, total_loss:1004.573120



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8033, 0.9287],
        [0.9416, 0.6993],
        [0.9378, 0.0558],
        [0.8994, 0.8177],
        [0.9135, 0.8662],
        [0.9284, 0.9129],
        [0.9431, 0.0441],
        [0.0700, 0.9080]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:76, combine_score:0.031308, cls_loss:309.003571, transfer_loss:692.186218, total_loss:1001.189697



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.9405, 0.9373],
        [0.9464, 0.2663],
        [0.9576, 0.0137],
        [0.9015, 0.7642],
        [0.9352, 0.3938],
        [0.9471, 0.8049],
        [0.9502, 0.0223],
        [0.2592, 0.9059]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:77, combine_score:0.030203, cls_loss:307.581085, transfer_loss:691.705750, total_loss:999.287842



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.1367, 0.9225],
        [0.9093, 0.9369],
        [0.9526, 0.0665],
        [0.8169, 0.8996],
        [0.9008, 0.8987],
        [0.8869, 0.9401],
        [0.9340, 0.1352],
        [0.3550, 0.9140]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:78, combine_score:0.041709, cls_loss:311.705170, transfer_loss:690.556885, total_loss:1002.262634



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8016, 0.9274],
        [0.9312, 0.5908],
        [0.9484, 0.0261],
        [0.9254, 0.5933],
        [0.9194, 0.5875],
        [0.9406, 0.9000],
        [0.9392, 0.0400],
        [0.3259, 0.8951]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:79, combine_score:0.032982, cls_loss:317.510590, transfer_loss:688.937988, total_loss:1006.449036



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2212, 0.9205],
        [0.9238, 0.8557],
        [0.9456, 0.0606],
        [0.8659, 0.8474],
        [0.9228, 0.8110],
        [0.9313, 0.9284],
        [0.9451, 0.0735],
        [0.2434, 0.9134]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:80, combine_score:0.031468, cls_loss:313.177521, transfer_loss:691.849365, total_loss:1005.026550



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2316, 0.9238],
        [0.9304, 0.7697],
        [0.9408, 0.0502],
        [0.8342, 0.8591],
        [0.8909, 0.9154],
        [0.9068, 0.9260],
        [0.9430, 0.0569],
        [0.0713, 0.9146]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:81, combine_score:0.028521, cls_loss:314.131805, transfer_loss:691.203125, total_loss:1005.334229



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7001, 0.9284],
        [0.9285, 0.7701],
        [0.9454, 0.0326],
        [0.9020, 0.7886],
        [0.9273, 0.6380],
        [0.9003, 0.9048],
        [0.9402, 0.0529],
        [0.4025, 0.8996]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:82, combine_score:0.031437, cls_loss:310.894714, transfer_loss:691.907715, total_loss:1002.802673



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7929, 0.9404],
        [0.9434, 0.4623],
        [0.9409, 0.0343],
        [0.8959, 0.8202],
        [0.9197, 0.8054],
        [0.9323, 0.8710],
        [0.9395, 0.0329],
        [0.1034, 0.9057]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:83, combine_score:0.029792, cls_loss:309.235077, transfer_loss:689.839111, total_loss:999.074585



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7667, 0.9330],
        [0.9413, 0.5883],
        [0.9406, 0.0374],
        [0.8903, 0.7371],
        [0.9257, 0.7497],
        [0.9424, 0.7179],
        [0.9377, 0.0398],
        [0.2560, 0.9018]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:84, combine_score:0.032317, cls_loss:309.071899, transfer_loss:691.564941, total_loss:1000.638489



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6717, 0.9358],
        [0.9400, 0.8420],
        [0.9344, 0.0635],
        [0.8894, 0.7732],
        [0.9277, 0.7008],
        [0.9268, 0.9011],
        [0.9462, 0.0573],
        [0.0510, 0.9089]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:85, combine_score:0.027602, cls_loss:309.694916, transfer_loss:691.855469, total_loss:1001.549622



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8299, 0.9299],
        [0.9401, 0.6493],
        [0.9459, 0.0341],
        [0.9087, 0.7840],
        [0.9309, 0.6942],
        [0.9409, 0.8628],
        [0.9469, 0.0525],
        [0.0739, 0.9015]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:86, combine_score:0.028069, cls_loss:308.908630, transfer_loss:691.000183, total_loss:999.909607



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5637, 0.9327],
        [0.9077, 0.9068],
        [0.9335, 0.0757],
        [0.8361, 0.8797],
        [0.8778, 0.9006],
        [0.9349, 0.9098],
        [0.9400, 0.0775],
        [0.0576, 0.9081]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:87, combine_score:0.029910, cls_loss:310.309784, transfer_loss:692.343140, total_loss:1002.652344



  0%|          | 0/1000 [00:00<?, ?it/s]

wandb: Network error (ReadTimeout), entering retry loop.


  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4714, 0.9306],
        [0.9231, 0.8080],
        [0.9590, 0.0182],
        [0.8753, 0.8277],
        [0.9120, 0.8254],
        [0.9459, 0.8431],
        [0.9510, 0.0522],
        [0.0829, 0.9106]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:88, combine_score:0.030184, cls_loss:308.884918, transfer_loss:691.737061, total_loss:1000.621582



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6572, 0.9340],
        [0.9318, 0.8335],
        [0.9475, 0.0496],
        [0.8765, 0.7703],
        [0.9121, 0.8781],
        [0.9295, 0.9253],
        [0.9449, 0.0887],
        [0.1270, 0.9132]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:89, combine_score:0.030779, cls_loss:309.460724, transfer_loss:691.598511, total_loss:1001.059204



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.8519, 0.9456],
        [0.9354, 0.7758],
        [0.9356, 0.0527],
        [0.8900, 0.8375],
        [0.9296, 0.8769],
        [0.9394, 0.8943],
        [0.9360, 0.1116],
        [0.2824, 0.9192]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:90, combine_score:0.028257, cls_loss:304.354309, transfer_loss:690.944824, total_loss:995.299072



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5776, 0.9333],
        [0.9243, 0.8744],
        [0.9366, 0.0709],
        [0.8642, 0.8918],
        [0.9113, 0.9087],
        [0.9236, 0.9287],
        [0.9399, 0.1233],
        [0.0562, 0.9163]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:91, combine_score:0.027985, cls_loss:306.876556, transfer_loss:690.800415, total_loss:997.676819



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3675, 0.9174],
        [0.9009, 0.9106],
        [0.9412, 0.1114],
        [0.8284, 0.8783],
        [0.8860, 0.8960],
        [0.8822, 0.9267],
        [0.9390, 0.2190],
        [0.0529, 0.9008]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:92, combine_score:0.028436, cls_loss:306.195557, transfer_loss:690.102600, total_loss:996.297791



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.7259, 0.9401],
        [0.9458, 0.4754],
        [0.9411, 0.0439],
        [0.8984, 0.7955],
        [0.8968, 0.9038],
        [0.9384, 0.9074],
        [0.9513, 0.0686],
        [0.0663, 0.9082]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:93, combine_score:0.029949, cls_loss:306.985657, transfer_loss:689.574524, total_loss:996.559998



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.3470, 0.9220],
        [0.9467, 0.8507],
        [0.9282, 0.1021],
        [0.8665, 0.8965],
        [0.8228, 0.9227],
        [0.8651, 0.9365],
        [0.9382, 0.1780],
        [0.0428, 0.9103]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:94, combine_score:0.027880, cls_loss:306.452240, transfer_loss:691.523987, total_loss:997.976013



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.9037, 0.9392],
        [0.9469, 0.5625],
        [0.9486, 0.0208],
        [0.9144, 0.8185],
        [0.9292, 0.7670],
        [0.9453, 0.9026],
        [0.9416, 0.0656],
        [0.0975, 0.9076]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:95, combine_score:0.028798, cls_loss:309.510773, transfer_loss:690.839233, total_loss:1000.349182



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.2474, 0.9134],
        [0.9232, 0.8998],
        [0.9440, 0.0379],
        [0.8638, 0.8486],
        [0.9076, 0.8606],
        [0.7533, 0.9179],
        [0.9455, 0.1543],
        [0.0565, 0.9066]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:96, combine_score:0.029339, cls_loss:304.121613, transfer_loss:691.153809, total_loss:995.275940



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.4254, 0.9247],
        [0.9528, 0.8859],
        [0.9424, 0.2514],
        [0.8422, 0.8510],
        [0.7011, 0.9242],
        [0.8239, 0.9335],
        [0.9442, 0.2707],
        [0.0265, 0.9192]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:97, combine_score:0.031743, cls_loss:303.732819, transfer_loss:691.530273, total_loss:995.263184



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.5465, 0.9289],
        [0.9491, 0.7834],
        [0.9375, 0.0450],
        [0.8773, 0.8362],
        [0.8937, 0.9033],
        [0.9095, 0.9291],
        [0.9406, 0.0403],
        [0.0446, 0.9154]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:98, combine_score:0.029766, cls_loss:305.292755, transfer_loss:690.817444, total_loss:996.110352



  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1386 [00:00<?, ?it/s]

tensor([[0.6175, 0.9288],
        [0.9395, 0.7519],
        [0.9346, 0.0307],
        [0.8987, 0.8047],
        [0.8901, 0.8763],
        [0.9212, 0.9168],
        [0.9356, 0.0804],
        [0.1516, 0.8994]], device='cuda:0', grad_fn=<SigmoidBackward0>)
Epoch:99, combine_score:0.028643, cls_loss:304.758636, transfer_loss:691.311340, total_loss:996.070129



In [13]:
file_eval = pd.read_csv('test.csv')
eval_set = Dataset_ASVspoof_eval(file_eval['id'].to_list())
eval_loader = DataLoader(eval_set, batch_size=args.batch_size,num_workers=8, shuffle=False)
inference(eval_loader, classifier, device, 'asdf_fold0.csv')

  0%|          | 0/6250 [00:00<?, ?it/s]