In [1]:
import librosa
import torch.nn as nn

audio_path = 'G3/data/corrected_split/test/source/airport-barcelona-203-6132-a.wav'
y, sr = librosa.load(audio_path)
audio=y
print(f"audio: {y}")
print(f"Shape of audio data: {y.shape}")
print(f"Sample rate: {sr}")


audio: [-0.005696    0.00483759  0.01827622 ...  0.02230001  0.01699919
  0.01644135]
Shape of audio data: (220500,)
Sample rate: 22050


In [2]:
import torch
from hear21passt.base import get_basic_model

In [3]:


class PaSSTFeatureExtractor(torch.nn.Module):
    def __init__(self, device=None):
        super(PaSSTFeatureExtractor, self).__init__()
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model = get_basic_model(mode="embed_only") 
        self.model.to(self.device)
       

    def forward(self, audio_waveform, sample_rate=32000):
     
        if audio_waveform.dim() == 1:
            audio_waveform = audio_waveform.unsqueeze(0)  # -e 
        # print("PaSST extractor got shape:", audio_waveform.shape)    
        audio_waveform = audio_waveform.to(self.device)        
        features = self.model(audio_waveform)
             
        return features
  

In [4]:
extractor=PaSSTFeatureExtractor()
audio_tensor=torch.tensor(y,dtype=torch.float32)
features=extractor(audio_tensor)
# print(features)
print(features.shape)



 Loading PASST TRAINED ON AUDISET 


PaSST(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((768,), ep

Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at /pytorch/aten/src/ATen/native/SpectralOps.cpp:875.)
  return _VF.stft(  # type: ignore[attr-defined]
  with torch.cuda.amp.autocast(enabled=False):


## GRL Code 

In [5]:
def calc_coeff(iter_num, high=1.0, low=0.0, alpha=10.0, max_iter=10000.0):
    return np.float32(2.0 * (high - low) / (1.0 + np.exp(-alpha*iter_num / max_iter)) - (high - low) + low)

In [6]:
def init_weights(m):
    classname = m.__class__.__name__
    if classname.find('Conv2d') != -1 or classname.find('ConvTranspose2d') != -1:
        nn.init.kaiming_uniform_(m.weight)
        nn.init.zeros_(m.bias)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight, 1.0, 0.02)
        nn.init.zeros_(m.bias)
    elif classname.find('Linear') != -1:
        nn.init.kaiming_normal_(m.weight)
        nn.init.zeros_(m.bias)

In [7]:
def grl_hook(coeff):
    def fun1(grad):
        return -coeff*grad.clone()
    return fun1

# network part


In [8]:
class generator(nn.Module):
  def __init__(self,passt_extractor,use_bottleneck=True, bottleneck_dim=256, new_cls=False, class_num=1000):
    super(generator,self).__init__()
    self.feature_extractor=passt_extractor
    self.use_bottleneck = use_bottleneck
    self.sigmoid = nn.Sigmoid()
    self.new_cls = new_cls
      
    if new_cls:
        if self.use_bottleneck:
            self.bottleneck = nn.Linear(768, bottleneck_dim)
            self.fc = nn.Linear(bottleneck_dim, class_num)#g2
            self.gvbg = nn.Linear(bottleneck_dim, class_num)#g3
            self.focal1 = nn.Linear( class_num,class_num)#g3 weighting , like regularization , helps to learn how to raw class logits intract with eachother
            self.focal2 = nn.Linear( class_num,1)#g3 weitghting
            self.bottleneck.apply(init_weights)#dont know whaat kaiming is 
            self.fc.apply(init_weights)
            self.gvbg.apply(init_weights)
            self.__in_features = bottleneck_dim
        else:
            self.fc = nn.Linear(768, class_num)
            self.fc.apply(init_weights)
            self.gvbg = nn.Linear(768, class_num)
            self.gvbg.apply(init_weights)
            self.__in_features = 768
    else:
        self.fc = nn.Identity()
        self.__in_features = 768

  def forward(self, x, gvbg=True):
    # print("input to ")  
    x = self.feature_extractor(x)
    x = x.view(x.size(0), -1)
      
    if self.use_bottleneck and self.new_cls:
        x = self.bottleneck(x)
    bridge = self.gvbg(x)
    y = self.fc(x)
      
    if gvbg:
        y = y - bridge
        
    return x, y, bridge

  def output_num(self):
    return self.__in_features

  def get_parameters(self):
    if self.new_cls:
        if self.use_bottleneck:
            parameter_list = [{"params":self.feature_extractor.parameters(), "lr_mult":1, 'decay_mult':2}, \
                            {"params":self.bottleneck.parameters(), "lr_mult":10, 'decay_mult':2}, \
                            {"params":self.fc.parameters(), "lr_mult":10, 'decay_mult':2}]
        else:
            parameter_list = [{"params":self.feature_extractor.parameters(), "lr_mult":1, 'decay_mult':2}, \
                            {"params":self.fc.parameters(), "lr_mult":10, 'decay_mult':2},
                            {"params":self.gvbg.parameters(), "lr_mult":10, 'decay_mult':2}]
    else:
        parameter_list = [{"params":self.parameters(), "lr_mult":1, 'decay_mult':2}]
    return parameter_list

In [9]:

device = "cuda" if torch.cuda.is_available() else "cpu"
# --- Run test ---

model = generator(extractor,class_num=10,bottleneck_dim=256,new_cls=True)
model.to(device)
audio_tensor.to(device)
features, y, bridge = model(audio_tensor)
print("Feature:", features.shape)
print("y:", y.shape)
print("bridge:", bridge.shape)


Feature: torch.Size([1, 256])
y: torch.Size([1, 10])
bridge: torch.Size([1, 10])


In [10]:
class AdversarialNetwork(nn.Module):
  def __init__(self, in_feature, hidden_size):
    super(AdversarialNetwork, self).__init__()
    self.ad_layer1 = nn.Linear(in_feature, hidden_size)
    self.ad_layer2 = nn.Linear(hidden_size, hidden_size)
    self.ad_layer3 = nn.Linear(hidden_size, 1)
    self.gvbd = nn.Linear(hidden_size, 1)
    self.relu1 = nn.ReLU()
    self.relu2 = nn.ReLU()
    self.dropout1 = nn.Dropout(0.5)
    self.dropout2 = nn.Dropout(0.5)
    self.dropout3 = nn.Dropout(0.5)
    self.sigmoid = nn.Sigmoid()
    self.apply(init_weights)
    self.iter_num = 0

  def forward(self, x):
    if self.training:
        self.iter_num += 1
    coeff = calc_coeff(self.iter_num)
    x = x * 1.0
    x.register_hook(grl_hook(coeff))
    x = self.ad_layer1(x)
    x = self.relu1(x)
    x = self.dropout1(x)
    x = self.ad_layer2(x)
    x = self.relu2(x)
    x = self.dropout2(x)
    y = self.ad_layer3(x)
    z = self.gvbd(x)
    return y,z

  def output_num(self):
    return 1
  def get_parameters(self):
    return [{"params":self.parameters(), "lr_mult":10, 'decay_mult':2}]


In [11]:
import numpy as np
discriminator = AdversarialNetwork(256,1024).to(device)
discriminator.train()
d1_output, d2_output = discriminator(features)
print("domain logits",d1_output,"\nsize",d1_output.shape)
print("gvb logits",d2_output,"\nsize",d2_output.shape)


domain logits tensor([[0.9948]], device='cuda:0', grad_fn=<AddmmBackward0>) 
size torch.Size([1, 1])
gvb logits tensor([[-2.9424]], device='cuda:0', grad_fn=<AddmmBackward0>) 
size torch.Size([1, 1])


<h2>learning rate scheduler<h2>

In [12]:

def inv_lr_scheduler(optimizer, iter_num, gamma, power, lr=0.001, weight_decay=0.0005):
    """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs."""
    lr = lr * (1 + gamma * iter_num) ** (-power)
    i=0
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr * param_group['lr_mult']
        param_group['weight_decay'] = weight_decay * param_group['decay_mult']
        i+=1

    return optimizer


schedule_dict = {"inv":inv_lr_scheduler}


<h2>loss<h2>

In [13]:
#dont know much about it 
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import math
import torch.nn.functional as F
import pdb

# domain discriminator loss 
class Myloss(nn.Module):
    def __init__(self,epsilon=1e-8):
        super(Myloss,self).__init__()
        self.epsilon = epsilon
        return
    def forward(self,input_, label, weight):
        entropy = - label * torch.log(input_ + self.epsilon) -(1 - label) * torch.log(1 - input_ + self.epsilon)
        return torch.sum(entropy * weight)/2 
    
def Entropy(input_):
    bs = input_.size(0)
    epsilon = 1e-5
    entropy = -input_ *torch.log(input_ + epsilon)
    entropy = torch.sum(entropy, dim=1)
    return entropy 
    
def grl_hook(coeff):
    def fun1(grad):
        return -coeff*grad.clone()
    return fun1
    
def GVB(input_list, ad_net, coeff=None, myloss=Myloss(),GVBD=False):
    softmax_output = input_list[0]
    focals = input_list[1].reshape(-1)
    ad_out,fc_out = ad_net(softmax_output)
    if GVBD==1:
        ad_out = nn.Sigmoid()(ad_out - fc_out)
    else:
        ad_out = nn.Sigmoid()(ad_out)
    batch_size = softmax_output.size(0) // 2
    dc_target = torch.from_numpy(np.array([[1]] * batch_size + [[0]] * batch_size)).float().cuda()

    x = softmax_output
    entropy = Entropy(x)
    entropy.register_hook(grl_hook(coeff))
    entropy = torch.exp(-entropy)
    mean_entropy = torch.mean(entropy)
    gvbg = torch.mean(torch.abs(focals))
    gvbd = torch.mean(torch.abs(fc_out))

    source_mask = torch.ones_like(entropy)
    source_mask[softmax_output.size(0)//2:] = 0
    source_weight = entropy*source_mask
    target_mask = torch.ones_like(entropy)
    target_mask[0:softmax_output.size(0)//2] = 0
    target_weight = entropy*target_mask
    weight = source_weight / torch.sum(source_weight).detach().item() + \
             target_weight / torch.sum(target_weight).detach().item()
    return myloss(ad_out,dc_target,weight.view(-1, 1)), mean_entropy, gvbg, gvbd 



<h2>data_list<h2>

In [14]:
#from __future__ import print_function, division

import torch
import numpy as np

import torchaudio
from torch.utils.data import Dataset

def make_dataset(audio_list, labels):#was image_list 
    if labels:
      len_ = len(audio_list)
      waves = [(audio_list[i].strip(), labels[i, :]) for i in range(len_)]  ### sure about this ? 
    else:
      if len(audio_list[0].split()) > 2:
        waves = [(val.split()[0], np.array([int(la) for la in val.split()[1:]])) for val in audio_list]
      else:
        waves = [(val.split()[0], int(val.split()[1])) for val in audio_list]
    return waves # waves was images 


def audio_loader(path, sr=16000):
    waveform, sample_rate = torchaudio.load(path)
    # Resample if needed
    if sample_rate != sr:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=sr)
        waveform = resampler(waveform)
    return waveform

class AudioList(Dataset):
    def __init__(self, audio_list, labels=None, transform=None, target_transform=None,sample_rate=16000):
        self.wavs = make_dataset(audio_list, labels) #wavs was imgs
        if len(self.wavs) == 0:
            raise(RuntimeError("Found 0 audio .wav files in the provides lists."))
        self.transform = transform
        self.target_transform = target_transform
        self.sample_rate=sample_rate


       
    def __getitem__(self, index):
        path, target = self.wavs[index]
        wav = audio_loader(path,sr=self.sample_rate)
        if self.transform is not None:
            wav = self.transform(wav)
        if self.target_transform is not None:
            target = self.target_transform(target)

        return wav, target

    def __len__(self):
        return len(self.wavs)




In [15]:
with open('train_source_list_labels.txt') as f:
    audio_list= f.readlines()
dataset=AudioList(audio_list)



In [16]:
from torch.utils.data import DataLoader
loader =DataLoader(dataset,shuffle=True,batch_size=4)
for w , l in loader :
    print(w.shape,l)
    break

torch.Size([4, 1, 160000]) tensor([6, 5, 5, 0])


In [17]:
scene_label_map = {
    "airport": 0,
    "bus": 1,
    "metro": 2,
    "metro_station": 3,
    "park": 4,
    "public_square": 5,
    "shopping_mall": 6,
    "street_pedestrian": 7,
    "street_traffic": 8,
    "tram": 9
}

<h2>train<h2>

In [31]:
import argparse
import os
import os.path as osp

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


from torch.utils.data import DataLoader


from torch.autograd import Variable
import random
import pdb
import math
import ipdb 

from packaging.version import Version


## look later 
def audio_classification_test(loader, model, gvbg=False,key="test_source"):
    start_test = True
    with torch.no_grad():
       iter_test = iter(loader[key])
       for i in range(len(loader[key])):
           data = next(iter_test)
           
           inputs = data[0]
           labels = data[1]
           inputs = inputs.cuda()
           labels = labels.cuda()
           
           #doing resizeing so it can be fead into passt to extract features
           inputs_batch_size = inputs.shape[0]
           inputs_data = inputs.shape[-1]
           inputs = inputs.reshape(inputs_batch_size , inputs_data)
           
           _, outputs ,_  = model(inputs,gvbg=gvbg) 
           if start_test:
               all_output = outputs.float()
               all_label = labels.float()
               start_test = False
           else:
               all_output = torch.cat((all_output, outputs.float()), 0)
               all_label = torch.cat((all_label, labels.float()), 0)
    _, predict = torch.max(all_output, 1)
    accuracy = torch.sum(torch.squeeze(predict).float() == all_label).item() / float(all_label.size()[0]) ## squeeze
    return accuracy

def correct_path(file_list):
    rm_string = '/DATA/disk1/hassassin/dataset/domain/OfficeHomeDataset/'
    for i in range(len(file_list)): 
        file_list[i] = file_list[i].replace(rm_string , "../data/office-home/")
    return file_list

In [34]:

        



def train(config):
    
    ## set pre-process
    #prep_dict = {
    dsets = {}
    dset_loaders = {}
    data_config = config["data"]
    #prep_config = config["prep"]
    # prep_dict["source_train"] = prep.image_target(**config["prep"]['params'])
    # prep_dict["target_train"] = prep.image_target(**config["prep"]['params'])
    # prep_dict["source_test"] = prep.image_test(**config["prep"]['params'])
    # prep_dict["target_test"] = prep.image_test(**config["prep"]['params'])

    
    ## prepare data
    train_bs = data_config["source"]["batch_size"]
    test_bs = data_config["test"]["batch_size"]
    
    ################## this is explictly used becasue i am lazy to remove the path in the text file####    
    source_list = open(data_config["source"]["list_path"]).readlines()
    source_list = correct_path(source_list)
    
    tgt_list = open(data_config["target"]["list_path"]).readlines()
    tgt_list = correct_path(tgt_list)
    
    test_s_list = open(data_config["test_source"]["list_path"]).readlines()
    test_s_list = correct_path(test_s_list)
    test_t_list = open(data_config["test_target"]["list_path"]).readlines()
    test_t_list = correct_path(test_t_list)
    
    
    ##################################################################################################
    dsets["source"] = AudioList(source_list)
    dset_loaders["source"] = DataLoader(dsets["source"], batch_size=train_bs, shuffle=True, num_workers=4, drop_last=True)
    
    dsets["target"] = AudioList(tgt_list)
    dset_loaders["target"] = DataLoader(dsets["target"], batch_size=train_bs, shuffle=True, num_workers=4, drop_last=True)

    dsets["test_source"] = AudioList(test_s_list)
    dset_loaders["test_source"] = DataLoader(dsets["test_source"], batch_size=test_bs, shuffle=False, num_workers=4)
    
    dsets["test_target"] = AudioList(test_t_list)
    dset_loaders["test_target"] = DataLoader(dsets["test_target"], batch_size=test_bs, shuffle=False, num_workers=4)

    ## set base network
    class_num = config["network"]["params"]["class_num"]
    net_config = config["network"]
    base_network = net_config["name"](**net_config["params"])
    base_network = base_network.cuda()

    ## add additional network for some methods
    ad_net = AdversarialNetwork( 10, 1024)
    ad_net = ad_net.cuda()
 
    ## set optimizer
    parameter_list = base_network.get_parameters() + ad_net.get_parameters()
    optimizer_config = config["optimizer"]
    optimizer = optimizer_config["type"](parameter_list, \
                    **(optimizer_config["optim_params"]))
    param_lr = []
    for param_group in optimizer.param_groups:
        param_lr.append(param_group["lr"])
    schedule_param = optimizer_config["lr_param"]
    lr_scheduler = schedule_dict[optimizer_config["lr_type"]] #######################

    #multi gpu
    gpus = config['gpu'].split(',') 
    if len(gpus) > 1:
        ad_net = nn.DataParallel(ad_net, device_ids=[int(i) for i,k in enumerate(gpus)]) # dont know what this does 
        base_network = nn.DataParallel(base_network, device_ids=[int(i) for i,k in enumerate(gpus)])
    
    ## train   
    len_train_source = len(dset_loaders["source"])
    len_train_target = len(dset_loaders["target"])
    transfer_loss_value = classifier_loss_value = total_loss_value = 0.0
    best_acc = 0.0
    for i in range(config["num_iterations"]):
        #test
        if i % config["test_interval"] == config["test_interval"] - 1:
            base_network.train(False)
            
            acc_s = audio_classification_test(dset_loaders, base_network, gvbg=config["GVBG"], key="test_source")#was temp_acc
            acc_t = audio_classification_test(dset_loaders, base_network, gvbg=config["GVBG"], key="test_target")
            
            temp_model = nn.Sequential(base_network)
            if acc_t > best_acc:
                best_acc = acc_t
                best_model = temp_model
            log_str = "iter: {:05d}, source_acc: {:.5f}, target_acc: {:.5f}".format(i, acc_s, acc_t)#was log_str = "iter: {:05d}, precision: {:.5f}".format(i, temp_acc)
            config["out_file"].write(log_str+"\n")
            config["out_file"].flush()
            print(log_str)
        #save model
        if i % config["snapshot_interval"] == 0:
            torch.save(base_network.state_dict(), osp.join(config["output_path"], \
                "iter_{:05d}_model.pth.tar".format(i)))

        ## train one iter
        base_network.train(True)
        ad_net.train(True)
        loss_params = config["loss"]                  
        optimizer = lr_scheduler(optimizer, i, **schedule_param)
        optimizer.zero_grad()

        #dataloader
        if i % len_train_source == 0:
            iter_source = iter(dset_loaders["source"])
        if i % len_train_target == 0:
            iter_target = iter(dset_loaders["target"])
            
        #network
        inputs_source, labels_source = next(iter_source)
        inputs_target, _ = next(iter_target)
        # print("input source",inputs_source.shape)
        # print("input target",inputs_target.shape)
        
        inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda()
        # print("after cuda shape",inputs_source.shape)

        
        source_batch_size_dataloader = inputs_source.shape[0]
        tgt_batch_size_dataloader = inputs_target.shape[0]
        audio_size_dataloader = inputs_source.shape[-1]
        inputs_source = inputs_source.reshape(source_batch_size_dataloader , audio_size_dataloader)
        inputs_target = inputs_target.reshape(tgt_batch_size_dataloader , audio_size_dataloader) 
        features_source, outputs_source, focal_source = base_network(inputs_source,gvbg=config["GVBG"])
        features_target, outputs_target, focal_target = base_network(inputs_target,gvbg=config["GVBG"])
        features = torch.cat((features_source, features_target), dim=0)
        outputs = torch.cat((outputs_source, outputs_target), dim=0)
        focals = torch.cat((focal_source,focal_target),dim=0)
        softmax_out = nn.Softmax(dim=1)(outputs)

        #loss calculation
        
        transfer_loss, mean_entropy, gvbg, gvbd = GVB([softmax_out,focals], ad_net, calc_coeff(i), GVBD=config['GVBD'])
        classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source)
        total_loss = loss_params["trade_off"] * transfer_loss + classifier_loss + config["GVBG"] * gvbg + abs(config['GVBD']) * gvbd

        if i % config["print_num"] == 0:
            log_str = "iter: {:05d}, transferloss: {:.5f}, classifier_loss: {:.5f}, mean entropy:{:.5f}, gvbg:{:.5f}, gvbd:{:.5f}".format(i, transfer_loss, classifier_loss, mean_entropy, gvbg, gvbd)
            config["out_file"].write(log_str+"\n")
            config["out_file"].flush()
            print(log_str)

        total_loss.backward()
        optimizer.step()
    torch.save(best_model, osp.join(config["output_path"], "best_model.pth.tar"))
    return best_acc


In [None]:
__name__ = True
if __name__ == True:

    assert Version(torch.__version__) >= Version('1.0.0'), 'PyTorch>=1.0.0 is required'
    ###### Update the parameters ###########
    class Args:
        gpu_id ='0'
        net = 'passt'
        dset = 'dcase'
        s_train_path = 'train_source_list_labels.txt'
        t_train_path = 'train_target_list_labels.txt'
        s_test_path = 'test_source_list_labels.txt'
        t_test_path = 'test_target_list_labels.txt'
        test_interval = 500
        snapshot_interval = 5000
        print_num = 100
        num_iterations = 30002
        output_dir = 'output'
        lr = 0.001
        trade_off = 1
        batch_size = 4
        GVBG = 1
        GVBD = 1
        CDAN = False         
    args = Args()
  
    
     
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id

    # train config
    config = {}
    config["GVBG"] = args.GVBG
    config["GVBD"] = args.GVBD 
    config["CDAN"] = args.CDAN
    config["gpu"] = args.gpu_id
    config["num_iterations"] = args.num_iterations 
    config["print_num"] = args.print_num
    config["test_interval"] = args.test_interval
    config["snapshot_interval"] = args.snapshot_interval
    config["output_for_test"] = True
    config["output_path"] = args.dset + "/" + args.output_dir

    if not osp.exists(config["output_path"]):
        os.system('mkdir -p '+config["output_path"])
    config["out_file"] = open(osp.join(config["output_path"], "log.txt"), "w")
    if not osp.exists(config["output_path"]):
        os.mkdir(config["output_path"])


    #config["prep"] = {'params':{"resize_size":256, "crop_size":224, 'alexnet':False}}
    config["loss"] = {"trade_off":args.trade_off}
    config["network"] = {
    "name": generator,
    "params": {
        "passt_extractor": extractor,
        "use_bottleneck": True,
        "bottleneck_dim": 256,
        "new_cls": True,
        "class_num": 10
            }
        }

    config["optimizer"] = {"type":optim.SGD, "optim_params":{'lr':args.lr, "momentum":0.9, \
                           "weight_decay":0.0005, "nesterov":True}, "lr_type":"inv", \
                           "lr_param":{"lr":args.lr, "gamma":0.001, "power":0.75} }

    config["dataset"] = args.dset
    config["data"] = {
        "source": {"list_path": args.s_train_path, "batch_size": args.batch_size},
        "target": {"list_path": args.t_train_path, "batch_size": args.batch_size},
        "test_source": {"list_path": args.s_test_path, "batch_size": args.batch_size},
        "test_target": {"list_path": args.t_test_path, "batch_size": args.batch_size},
        "test": {"batch_size": args.batch_size}
                     }
    seed=2025

    # if config["dataset"] == "office-home":
    #     seed = 2019
    #     config["optimizer"]["lr_param"]["lr"] = 0.001 # optimal parameters
    #     config["network"]["params"]["class_num"] = 65
    # elif config["dataset"] == "office":
    #     seed = 2019
    #     if   ("webcam" in args.s_dset_path and "amazon" in args.t_dset_path) or \
    #          ("dslr" in args.s_dset_path and "amazon" in args.t_dset_path):
    #          config["optimizer"]["lr_param"]["lr"] = 0.001 # optimal parameters
    #     elif ("amazon" in args.s_dset_path and "webcam" in args.t_dset_path) or \
    #          ("amazon" in args.s_dset_path and "dslr" in args.t_dset_path) or \
    #          ("webcam" in args.s_dset_path and "dslr" in args.t_dset_path) or \
    #          ("dslr" in args.s_dset_path and "webcam" in args.t_dset_path):
    #          config["optimizer"]["lr_param"]["lr"] = 0.0003 # optimal parameters
    #     config["network"]["params"]["class_num"] = 31
    # elif config["dataset"] == "visda":
    #     seed = 9297
    #     config["optimizer"]["lr_param"]["lr"] = 0.0003 # optimal parameters
    #     config["network"]["params"]["class_num"] = 12
    # else:
    #     raise ValueError('Dataset cannot be recognized. Please define your own dataset here.')
    print(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    config["out_file"].write(str(config))
    config["out_file"].flush()
    train(config)


2025
iter: 00000, transferloss: 0.67508, classifier_loss: 0.39530, mean entropy:0.51333, gvbg:0.49823, gvbd:0.82561
iter: 00100, transferloss: 1.30132, classifier_loss: 0.52701, mean entropy:0.60603, gvbg:0.19104, gvbd:0.60151
iter: 00200, transferloss: 0.53631, classifier_loss: 0.12656, mean entropy:0.72923, gvbg:0.09774, gvbd:0.32336
iter: 00300, transferloss: 0.61623, classifier_loss: 0.40660, mean entropy:0.52012, gvbg:0.09854, gvbd:0.09855
iter: 00400, transferloss: 0.72418, classifier_loss: 0.06869, mean entropy:0.76811, gvbg:0.08434, gvbd:0.13143
iter: 00499, source_acc: 0.76061, target_acc: 0.47005
iter: 00500, transferloss: 0.69916, classifier_loss: 0.74263, mean entropy:0.61767, gvbg:0.07199, gvbd:0.06846
iter: 00600, transferloss: 0.70352, classifier_loss: 0.94860, mean entropy:0.69468, gvbg:0.08576, gvbd:0.03904
iter: 00700, transferloss: 0.72695, classifier_loss: 0.06642, mean entropy:0.69821, gvbg:0.08429, gvbd:0.04155
iter: 00800, transferloss: 0.52392, classifier_loss: 

In [None]:
class AudioList(torch.utils.data.Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
        self.data = [(line.split()[0], int(line.split()[1])) for line in file_list]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        path, label = self.data[index]
        waveform, sr = torchaudio.load(path)  
        if self.transform:
            waveform = self.transform(waveform)
        return waveform, label


<h2>text file creation script<h2>

In [None]:
import os
os.getcwd()


In [None]:
def save_wav_filenames(input_dir, output_txt_path, full_path=True):
    with open(output_txt_path, 'w') as f:
        for root, _, files in os.walk(input_dir):
            for file in files:
                if file.lower().endswith('.wav'):
                    path = os.path.join(root, file) if full_path else file
                    f.write(path + '\n')

# Example usage
input_directory = './G3/data/corrected_split/test/target'           # Change this to your .wav folder
output_file = 'test_target_list.txt'         # Desired output text file
save_wav_filenames(input_directory, output_file, full_path=True)


In [None]:
with open('test_target_list_labels.txt') as f:
    audio_list= f.readlines()
audio_list    

In [None]:
def get_dcase_label(path):
    for scene, label in scene_label_map.items():
        if scene in path:
            return label
    return -1 

with open("test_target_list.txt") as fin, open("test_target_list_labels.txt", "w") as fout:
    for line in fin:
        path = line.strip()
        label = get_dcase_label(path)
        if label == -1:
            print(f"Warning: Scene not found in {path}")
            continue
        fout.write(f"{path} {label}\n")