In [1]:
import torchaudio.functional as F
import torch
import numpy as np
from models import CustomVGGish2

deconv = np.load('../Acoustic-IA-Data-Hack-2024/data/LivingRoom_preprocessed_hack/Human1/deconvoled_trim.npy')
centroid = np.load('../Acoustic-IA-Data-Hack-2024/data/LivingRoom_preprocessed_hack/Human1/centroid.npy')

train_xy = centroid[200:]
valid_xy = centroid[100:200]
test_xy = centroid[:100]

train_mean = np.mean(train_xy, axis=0)
train_std = np.std(train_xy, axis=0)

train_xy = (train_xy - train_mean) / (train_std + 1e-8)
valid_xy = (valid_xy - train_mean) / (train_std + 1e-8)
test_xy =  (test_xy - train_mean) / (train_std + 1e-8)

train_waves = deconv[200:, :]
# 30950 seems to be the rough cutoff after which vggish treats the input as two examples.
valid_waves = deconv[100:200, :]
#Test Waves
test_waves = deconv[:100, :]

precutoff = 92850

train_waves = train_waves[..., :precutoff]
valid_waves = valid_waves[..., :precutoff]
test_waves = test_waves[..., :precutoff]

train_waves = torch.Tensor(train_waves).cuda()
train_xy = torch.Tensor(train_xy).cuda()

valid_waves = torch.Tensor(valid_waves).cuda()
valid_xy = torch.Tensor(valid_xy).cuda()

test_waves = torch.Tensor(test_waves).cuda()
test_xy = torch.Tensor(test_xy).cuda()

def resample(audio, ir=48000, tr=16000):
    resampled_waveform = F.resample(
        audio,
        ir,
        tr,
        lowpass_filter_width=64,
        rolloff=0.9475937167399596,
        resampling_method="kaiser_window",
        beta=14.769656459379492,
    )
    return resampled_waveform

print("Resampling")
train_waves = resample(train_waves)
valid_waves = resample(valid_waves)
test_waves = resample(test_waves)

vggish_cutoff = 15475

train_waves = train_waves[..., :vggish_cutoff]
valid_waves = valid_waves[..., :vggish_cutoff]
test_waves = test_waves[..., :vggish_cutoff]

out_channels = 2

# Instantiate the CustomVGGish2 model
net = CustomVGGish2(in_channels=4, out_channels=10)  # Adjust in_channels and out_channels according to your requirements

total_params = sum(p.numel() for p in net.parameters())
trainable_params = sum(p.numel() for p in net.parameters() if p.requires_grad)

print('Total parameters: %i'%total_params)
print('Trainable parameters: %i'%trainable_params)
    
xy_loss_fn = torch.nn.MSELoss(reduction='mean')

lr = 0.00001

optimizer = torch.optim.Adam(params=net.parameters(), lr=lr, betas=(0.9, 0.999))
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.999995)

batch_size = 32

N_iter = int(train_waves.shape[0] / batch_size)
train_losses = []
train_xy_losses = []
valid_losses = []
valid_xy_losses = []
step_count = 0

num_epochs = 10

for n in range(num_epochs):
    print('Reshuffling for Epoch %i'%n, flush=True)
    rand_idx = np.random.permutation(train_waves.shape[0])
    net.train()
    optimizer.zero_grad()
    for i in range(N_iter):
        curr_idx = rand_idx[i*batch_size:(i+1)*batch_size]
        net_out = net(train_waves[curr_idx, :])
        results = postprocess_net_output(net_out)
        xy_loss = xy_loss_fn(results[:, :2], train_xy[curr_idx, :2])
        loss = xy_loss
        optimizer.zero_grad()
        loss.backward()
        train_loss = loss.item()
        train_losses.append((step_count, train_loss))
        train_xy_losses.append((step_count, xy_loss.item()))
        step_count+=1
        optimizer.step()
        scheduler.step()

    net.eval()
    valid_loss_xy_arr = np.zeros(valid_waves.shape[0], dtype=np.float32)
    valid_loss_arr = np.zeros(valid_waves.shape[0], dtype=np.float32)
    for i in range(valid_waves.shape[0]):
        with torch.no_grad():
            results = torch.squeeze(postprocess_net_output(net(torch.unsqueeze(valid_waves[i, :], axis=0)).view(-1, 1)))
        xy_loss = xy_loss_fn(results[:2], valid_xy[i, :2])
        valid_loss_xy_arr[i] = xy_loss.item()
        loss = xy_loss
        valid_loss_arr[i] = loss.item()
    valid_xy_loss = np.mean(valid_loss_xy_arr)
    valid_loss = np.mean(valid_loss_arr)
    print('Validation XY Loss: %0.3f'%valid_xy_loss)
    print('Validation Loss: %0.3f'%valid_loss)
    valid_losses.append((step_count, valid_loss))
    valid_xy_losses.append((step_count, valid_xy_loss))

    
    np.save(os.path.join(args.error_path, 'train_losses.npy'), np.array(train_losses, dtype=np.float32))
    np.save(os.path.join(args.error_path, 'valid_losses.npy'), np.array(valid_losses, dtype=np.float32))

    #Iterate through test
    test_errors = np.zeros(test_waves.shape[0], dtype=np.float32)

    for i in range(test_waves.shape[0]):
        with torch.no_grad():
            results = torch.squeeze(postprocess_net_output(net(torch.unsqueeze(test_waves[i, :], axis=0)).view(-1, 1)))
            
            
        test_errors[i] = torch.norm(unnormalize(results[:2]) - unnormalize(test_xy[i, :2])).item()

    print("TEST ERROR")
    print(test_errors)
        
    print("MEAN TEST ERROR",flush=True)
    print(np.mean(test_errors))
    print("MED TEST ERROR")
    print(np.median(test_errors))
    print("STD TEST ERROR")
    print(np.std(test_errors))

    np.save(os.path.join(args.error_path, 'test_errors.npy'), np.array(test_errors, dtype=np.float32))        


    torch.save({
        'epoch': n,
        'model_state_dict': net.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_losses': train_losses,
        'train_xy_losses': train_xy_losses,
        'valid_losses': valid_losses,
        'valid_xy_losses': valid_xy_losses,
        'train_mean': train_mean,
        'train_std': train_std,
        'norm_val_min':norm_val_min,
        'norm_val_range':norm_val_range,
        'lr': args.lr,
    }, args.save_path)
    


# Prepare your input data (assuming signals_tensor is your processed input tensor)
# Assuming signals_tensor is already reshaped and processed as needed by CustomVGGish2
# Make sure signals_tensor is a PyTorch tensor and is on the correct device
signals_tensor = torch.tensor(signals, dtype=torch.float32)  # Assuming signals is your numpy array of signals
signals_tensor = signals_tensor.to(model.device)  # Ensure tensor is on the correct device

# Pass the input data through the model
output = model(signals_tensor)

# Now 'output' contains the output of the CustomVGGish2 model
# You can use this output for further processing or analysis


OSError: /opt/mamba/lib/python3.11/site-packages/torchaudio/lib/libtorchaudio.so: undefined symbol: _ZN3c104cuda9SetDeviceEi

In [None]:
train_waves.strides()

AttributeError: 'Tensor' object has no attribute 'strides'