In [None]:
import numpy as np
import os
import random
import shutil
import time
import warnings
from collections import defaultdict
from functools import reduce
import json
import gc

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models


from google.colab import drive
import os
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/My Drive/Courses/CS545_MLSP/CS545_MLSP_Project/Code/')
# from model import *
# from modeldropout import *
from modeldropoutaff import *

Mounted at /content/drive


In [None]:
# epochs: 20
training_params  = {
    "batch_size" : 32,
    "test_batch_size" : 64,
    "epochs" : 15,
    "start_epoch" : 1,
    "n_warmup_steps" : 1000,
    "log_interval" : 30
}
device =  torch.device('cuda' if torch.cuda.is_available() else 'cpu') #torch.device('cpu')

## Code related to model and validation

In [None]:
def accuracy(output, target, topk=(1,)):
    """ Computes the accuracy over the k top predictions for the specified values of k """
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

class AverageMeter(object):
    """ Computes and stores the average and current value """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / (self.count+1e-10)

class ScheduledOptim(object):
    """ A simple wrapper class for learning rate scheduling """

    def __init__(self, optimizer, n_warmup_steps):
        self.optimizer = optimizer
        self.d_model = 64
        self.n_warmup_steps = n_warmup_steps
        self.n_current_steps = 0
        self.delta = 1

    def step(self):
        "Step by the inner optimizer"
        self.optimizer.step()

    def zero_grad(self):
        "Zero out the gradients by the inner optimizer"
        self.optimizer.zero_grad()

    def increase_delta(self):
        self.delta *= 2

    def update_learning_rate(self):
        "Learning rate scheduling per step"

        self.n_current_steps += self.delta
        new_lr = np.power(self.d_model, -0.5) * np.min([
            np.power(self.n_current_steps, -0.5),
            np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = new_lr
        return new_lr

    def state_dict(self):
        ret = {
            'd_model': self.d_model,
            'n_warmup_steps': self.n_warmup_steps,
            'n_current_steps': self.n_current_steps,
            'delta': self.delta,
        }
        ret['optimizer'] = self.optimizer.state_dict()
        return ret

    def load_state_dict(self, state_dict):
        self.d_model = state_dict['d_model']
        self.n_warmup_steps = state_dict['n_warmup_steps']
        self.n_current_steps = state_dict['n_current_steps']
        self.delta = state_dict['delta']
        self.optimizer.load_state_dict(state_dict['optimizer'])

In [None]:
# loads the trained parameters for the specified model from the specified path
def load_model(model, checkpointPath):
  checkpoint = torch.load(checkpointPath)
  model.load_state_dict(checkpoint['state_dict'])

  optim = ScheduledOptim(
          torch.optim.Adam(
          filter(lambda p: p.requires_grad, model.parameters()),
          betas=(0.9, 0.98), eps=1e-09, weight_decay=1e-4, lr=3e-4, amsgrad=True),
      training_params['n_warmup_steps'])
  optim.load_state_dict(checkpoint['optimizer'])

  return model, optim

In [None]:
def validate(val_loader, model, device, log_interval):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # targets and predictions
    targets = []
    predictions = []

    # switch to evaluate mode
    model.eval()
    utt2scores = defaultdict(list)
    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input  = input.to(device, non_blocking=True)

            target = target.to(device, non_blocking=True).view((-1,))
            targets.extend([t.item() for t in target.to(torch.device('cpu'))])

            # compute output
            output = model(input)

            # calculating y_predicted
            pred = output.argmax(1)
            predictions.extend([p.item() for p in pred.to(torch.device('cpu'))])

            # loss 
            loss = F.nll_loss(output, target)

            # measure accuracy and record loss
            acc1, = accuracy(output, target, topk=(1, ))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % log_interval == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1))

    print('===> Acc@1 {top1.avg:.3f}\n'.format(top1=top1))

    return top1.avg

In [None]:
# checkpointPath = './model_snapshots/9/15_96.013.pth.tar'
# model = se_resnet34(2,True).to(torch.device("cuda:0"))

# model, optim = load_model(model, checkpointPath)

## Code for loading Testing Data

In [None]:
# AFF 

def local_att(x):
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    x = F.batch_norm(x, running_mean=mean, running_var=var)
    x = F.relu(x, inplace=True)
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.batch_norm(x, running_mean=mean, running_var=var)

    return x

def global_att(x):
    x = F.adaptive_avg_pool2d(x, 3)
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.batch_norm(x, running_mean=mean, running_var=var)
    x = F.relu(x, inplace=True)
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.batch_norm(x, running_mean=mean, running_var=var)

    return x

def aff(x, y):
    xa = x + y
    xl = local_att(xa)
    xg = global_att(xa)
    del xa
    gc.collect()

    xlg = xl + xg
    del xl, xg
    gc.collect()
    
    # wei = torch.sigmoid(xlg)
    padded_wei = F.pad(torch.sigmoid(xlg),(1,1,1,1))
    xo = 2 * x * padded_wei + 2 * y * (1 - padded_wei)

    return xo


In [None]:
class AudioFeatureDataset(Dataset):
    def __init__(self, filepath, label):

        #------------------------------------------------------------------------------------------------------------------------------------------------#
        # MFCC data
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        X = np.load(filepath + '/mfcc.npy')
        fakeMfccData = torch.Tensor(X)
        del X 
        gc.collect()

        self.labels = [label] * fakeMfccData.shape[0] # 0=fake, 1=real

        paddedMfccData = torch.reshape(\
                                       F.pad(fakeMfccData, (0,0,58,58), mode='constant', value=0),\
                                        (-1,1,136,44))
        del fakeMfccData
        gc.collect()
        # print('MFCC:', paddedMfccData.shape)
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        # LFCC data
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        X = np.load(filepath + '/lfcc.npy')
        fakeLfccData = torch.Tensor(X)
        del X
        gc.collect()

        paddedLfccData = torch.reshape(\
                                       F.pad(fakeLfccData, (15,16), mode='constant', value=0),\
                                        (-1,1,136,44))
        del fakeLfccData
        gc.collect()

        # print('LFCC:', paddedLfccData.shape)

        #-------------------------------------------------------------------------------------------------------------------------------------------------#
        paddedMfccData = F.conv2d(paddedMfccData, torch.randn(1,1,2,2), stride=1)
        paddedLfccData = F.conv2d(paddedLfccData, torch.randn(1,1,2,2), stride=1)

        self.fusedFeatures = aff(paddedMfccData, paddedLfccData) 
        # print('Fused features shape: ', self.fusedFeatures.shape)

        del paddedLfccData, paddedMfccData
        gc.collect()

    def __len__(self):
        return len(self.fusedFeatures)

    def __getitem__(self, idx):
        return self.fusedFeatures[idx], self.labels[idx]

In [None]:
class AudioFeatureDatasetWoAFF(Dataset):
    def __init__(self, filepath, label):

        #------------------------------------------------------------------------------------------------------------------------------------------------#
        # MFCC data
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        X = np.load(filepath + '/mfcc.npy')
        fakeMfccData = torch.Tensor(X)
        del X 
        gc.collect()

        self.labels = [label] * fakeMfccData.shape[0] # 0=fake, 1=real

        paddedMfccData = torch.reshape(\
                                       F.pad(fakeMfccData, (0,0,58,58), mode='constant', value=0),\
                                        (-1,1,136,44))
        del fakeMfccData
        gc.collect()
        print('MFCC:', paddedMfccData.shape)
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        # LFCC data
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        X = np.load(filepath + '/lfcc.npy')
        fakeLfccData = torch.Tensor(X)
        del X
        gc.collect()

        paddedLfccData = torch.reshape(\
                                       F.pad(fakeLfccData, (15,16), mode='constant', value=0),\
                                        (-1,1,136,44))
        del fakeLfccData
        gc.collect()

        print('LFCC:', paddedLfccData.shape)

        #-------------------------------------------------------------------------------------------------------------------------------------------------#
        self.fusedFeatures = torch.stack((paddedMfccData, paddedLfccData)).reshape(len(self.labels), 2, 136, 44)
        print('Stacked shape: ', self.fusedFeatures.shape)

        del paddedLfccData, paddedMfccData
        gc.collect()

    def __len__(self):
        return len(self.fusedFeatures)

    def __getitem__(self, idx):
        return self.fusedFeatures[idx], self.labels[idx]

# Actual testing

## Testing on a single model single folder

In [None]:
# AFF 
def local_att(x):
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    x = F.batch_norm(x, running_mean=mean, running_var=var)
    x = F.relu(x, inplace=True)
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.batch_norm(x, running_mean=mean, running_var=var)

    return x

def global_att(x):
    x = F.adaptive_avg_pool2d(x, 3)
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.batch_norm(x, running_mean=mean, running_var=var)
    x = F.relu(x, inplace=True)
    x = F.conv2d(x, torch.randn(1, 1, 2, 2), stride=1)
    mean = torch.mean(x)
    mean = torch.reshape(mean, [1])
    var = torch.var(x, unbiased = True)
    var = torch.reshape(var, [1])
    x = F.batch_norm(x, running_mean=mean, running_var=var)

    return x

def aff(x, y):
    xa = x + y
    xl = local_att(xa)
    xg = global_att(xa)
    del xa
    gc.collect()

    xlg = xl + xg
    del xl, xg
    gc.collect()
    
    # wei = torch.sigmoid(xlg)
    padded_wei = F.pad(torch.sigmoid(xlg),(1,1,1,1))
    xo = 2 * x * padded_wei + 2 * y * (1 - padded_wei)

    return xo


class AudioFeatureDataset(Dataset):
    def __init__(self, filepath, sampleCount = 2000):

        #------------------------------------------------------------------------------------------------------------------------------------------------#
        # MFCC data
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        X = np.load(filepath + 'real/mfcc_masked.npy')
        realRandomIndexSubset = np.random.choice(X.shape[0], sampleCount, replace=False)
        realMfccData = torch.Tensor(X[realRandomIndexSubset])
        del X
        gc.collect()

        X = np.load(filepath + 'fake/mfcc_masked.npy')
        fakeRandomIndexSubset = np.random.choice(X.shape[0], sampleCount, replace=False)
        fakeMfccData = torch.Tensor(X[fakeRandomIndexSubset])
        del X 
        gc.collect()

        self.labels = np.concatenate((np.zeros(fakeMfccData.shape[0]), np.ones(realMfccData.shape[0]))).astype(np.int64)  # 0=fake, 1=real

        paddedMfccData = torch.reshape(F.pad(torch.cat((fakeMfccData, realMfccData), axis = 0)\
                                             , (0,0,58,58), mode='constant', value=0),\
                                        (-1,1,136,44))
        del fakeMfccData, realMfccData
        gc.collect()
        print('MFCC:', paddedMfccData.shape)
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        # LFCC data
        #------------------------------------------------------------------------------------------------------------------------------------------------#
        X = np.load(filepath + 'real/lfcc_masked.npy')
        realLfccData = torch.Tensor(X[realRandomIndexSubset])
        del X
        gc.collect()

        X = np.load(filepath + 'fake/lfcc_masked.npy')
        fakeLfccData = torch.Tensor(X[fakeRandomIndexSubset])
        del X
        gc.collect()

        paddedLfccData = torch.reshape(F.pad(torch.cat((fakeLfccData, realLfccData), axis = 0)\
                                             , (15,16), mode='constant', value=0),\
                                        (-1,1,136,44))
        del fakeLfccData, realLfccData
        gc.collect()

        print('LFCC:', paddedLfccData.shape)

        paddedMfccData = F.conv2d(paddedMfccData, torch.randn(1,1,2,2), stride=1)
        paddedLfccData = F.conv2d(paddedLfccData, torch.randn(1,1,2,2), stride=1)

        self.fusedFeatures = aff(paddedMfccData, paddedLfccData) 
        print('Fused features shape: ', self.fusedFeatures.shape)

        del paddedLfccData, paddedMfccData
        gc.collect()

    def __len__(self):
        return len(self.fusedFeatures)

    def __getitem__(self, idx):
        return self.fusedFeatures[idx], self.labels[idx]

In [None]:
kwargs =  {'num_workers': 2, 'pin_memory': True} if device == torch.device('cuda') else {}

folder = './features/for/training/'

best_acc, best_preds, best_targets = 0, [], []

torch.manual_seed(i)
testDataset =  AudioFeatureDataset(folder, 1000) #AudioFeatureDatasetWoAFF(basepath + folder, label)
testLoader = DataLoader(testDataset, batch_size=training_params['test_batch_size'], shuffle = True, **kwargs)
del testDataset

# validate the model against the test data
with torch.no_grad():
  model = se_resnet34(2,True).to(torch.device("cuda:0"))
  checkpointPath = './model_snapshots/14/14_92.788.pth.tar'
  acc, targets, predictions = validate(testLoader, model, device, training_params['log_interval'])


MFCC: torch.Size([2000, 1, 136, 44])
LFCC: torch.Size([2000, 1, 136, 44])
Fused features shape:  torch.Size([2000, 1, 135, 43])
Using Kaiming Initialization.
Test: [0/32]	Time 0.140 (0.140)	Loss 38.9294 (38.9294)	Acc@1 53.125 (53.125)
Test: [30/32]	Time 0.024 (0.032)	Loss 40.0520 (45.5343)	Acc@1 48.438 (49.950)
===> Acc@1 50.000

Acc:  tensor(50.0000, device='cuda:0')
MFCC: torch.Size([2000, 1, 136, 44])
LFCC: torch.Size([2000, 1, 136, 44])
Fused features shape:  torch.Size([2000, 1, 135, 43])
Using Kaiming Initialization.
Test: [0/32]	Time 0.154 (0.154)	Loss 2.6589 (2.6589)	Acc@1 53.125 (53.125)
Test: [30/32]	Time 0.023 (0.031)	Loss 2.6030 (3.5035)	Acc@1 54.688 (49.950)
===> Acc@1 50.000

MFCC: torch.Size([2000, 1, 136, 44])
LFCC: torch.Size([2000, 1, 136, 44])
Fused features shape:  torch.Size([2000, 1, 135, 43])
Using Kaiming Initialization.
Test: [0/32]	Time 0.136 (0.136)	Loss 7.0861 (7.0861)	Acc@1 48.438 (48.438)
Test: [30/32]	Time 0.022 (0.026)	Loss 6.5714 (6.7264)	Acc@1 43.750 (

KeyboardInterrupt: ignored

In [None]:
print(predictions)
print(targets)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0

## Testing on entire folder

In [None]:
def test_on_all_folders(model, checkpointPath, fakeDataFolder):
  # load the model parameters
  model, optim = load_model(model, checkpointPath)

  folders = []
  
  
  if(fakeDataFolder):
    basepath = './data/test/fake/features_fake/'
    label = 0
    # torch.manual_seed(58)
    folders = ['FastSpeech', 'LA_Fake', 'Tacotron&Waveglow', 'WaveNet', 'WaveRNN']
  else:
    basepath = './data/test/real/features_real/'
    label = 1
    # torch.manual_seed(55)
    folders = ['la', 'for']

  averageAccuracies = []
  for folder in folders:
    # load the test data
    kwargs =  {'num_workers': 2, 'pin_memory': True} if device == torch.device('cuda') else {}
    print('\nTesting Data from folder: ', folder)

    testDataset =  AudioFeatureDataset(basepath + folder, label) #AudioFeatureDatasetWoAFF(basepath + folder, label)
    testLoader = DataLoader(testDataset, shuffle = True, **kwargs)
    del testDataset

    # validate the model against the test data
    acc = validate(testLoader, model, device, 1000)
    averageAccuracies.append(acc)

  return averageAccuracies

In [None]:
torch.manual_seed(29)
model = se_resnet34(2,True).to(torch.device("cuda:0"))
checkpointPath = './model_snapshots/9/model_best.pth.tar'

averageAccuracies = test_on_all_folders(model, checkpointPath, True)
averageAccuracies = [acc.to(torch.device('cpu')).item() for acc in averageAccuracies]

Using Kaiming Initialization.

Testing Data from folder:  FastSpeech
Test: [0/65]	Time 0.122 (0.122)	Loss 0.6372 (0.6372)	Acc@1 100.000 (100.000)
===> Acc@1 80.000


Testing Data from folder:  LA_Fake
Test: [0/64]	Time 0.108 (0.108)	Loss 0.4628 (0.4628)	Acc@1 100.000 (100.000)
===> Acc@1 87.500


Testing Data from folder:  Tacotron&Waveglow
Test: [0/55]	Time 0.105 (0.105)	Loss 9.3367 (9.3367)	Acc@1 0.000 (0.000)
===> Acc@1 0.000


Testing Data from folder:  WaveNet
Test: [0/62]	Time 0.119 (0.119)	Loss nan (nan)	Acc@1 100.000 (100.000)
===> Acc@1 100.000


Testing Data from folder:  WaveRNN
Test: [0/71]	Time 0.119 (0.119)	Loss 0.0077 (0.0077)	Acc@1 100.000 (100.000)
===> Acc@1 100.000



In [None]:
maxi = 0
maxAvg = 0

for i in range(1, 150):
  torch.manual_seed(i)
  model = se_resnet34(2,True).to(torch.device("cuda:0"))
  checkpointPath = './model_snapshots/9/model_best.pth.tar'

  averageAccuracies = test_on_all_folders(model, checkpointPath, True)
  averageAccuracies = [acc.to(torch.device('cpu')).item() for acc in averageAccuracies]

  if(np.mean(averageAccuracies) > maxAvg):
    maxAvg = np.mean(averageAccuracies)
    maxi = i 
    print(maxAvg)

print(maxi, maxAvg)

Using Kaiming Initialization.

Testing Data from folder:  FastSpeech
Test: [0/65]	Time 0.094 (0.094)	Loss 1.0418 (1.0418)	Acc@1 0.000 (0.000)
===> Acc@1 30.769


Testing Data from folder:  LA_Fake
Test: [0/64]	Time 0.088 (0.088)	Loss 0.6357 (0.6357)	Acc@1 100.000 (100.000)
===> Acc@1 12.500


Testing Data from folder:  Tacotron&Waveglow
Test: [0/55]	Time 0.097 (0.097)	Loss 7.8146 (7.8146)	Acc@1 0.000 (0.000)
===> Acc@1 0.000


Testing Data from folder:  WaveNet
Test: [0/62]	Time 0.092 (0.092)	Loss nan (nan)	Acc@1 100.000 (100.000)
===> Acc@1 100.000


Testing Data from folder:  WaveRNN
Test: [0/71]	Time 0.101 (0.101)	Loss 9.3135 (9.3135)	Acc@1 0.000 (0.000)
===> Acc@1 47.887

38.231310272216795
Using Kaiming Initialization.

Testing Data from folder:  FastSpeech
Test: [0/65]	Time 0.108 (0.108)	Loss 9.9333 (9.9333)	Acc@1 0.000 (0.000)
===> Acc@1 3.077


Testing Data from folder:  LA_Fake
Test: [0/64]	Time 0.099 (0.099)	Loss 0.0005 (0.0005)	Acc@1 100.000 (100.000)
===> Acc@1 87.500


Tes

In [None]:
maxiReal, maxiFake = 0, 0
maxAvgReal, maxAvgFake = 0, 0
maxAvg = 0

for i in range(1, 150):
  model = se_resnet34(2,True).to(torch.device("cuda:0"))
  checkpointPath = './model_snapshots/9/model_best.pth.tar'

  torch.manual_seed(i)
  averageAccuracies = test_on_all_folders(model, checkpointPath, True)
  averageAccuraciesFake = [acc.to(torch.device('cpu')).item() for acc in averageAccuracies]

  torch.manual_seed(i)
  averageAccuracies = test_on_all_folders(model, checkpointPath, False)
  averageAccuraciesReal = [acc.to(torch.device('cpu')).item() for acc in averageAccuracies]
  if(np.mean(averageAccuraciesFake) * np.mean(averageAccuraciesReal) > maxAvg):
    maxAvgReal = np.mean(averageAccuraciesReal)
    maxAvgFake = np.mean(averageAccuraciesFake)
    maxAvg = np.mean(averageAccuraciesFake) * np.mean(averageAccuraciesReal)

    maxiReal, maxiFake = i, i 

    print('Real:', maxAvgReal, averageAccuraciesReal)
    print('Fake:', maxAvgFake, averageAccuraciesFake)


print(maxiReal, maxiFake)
print(maxAvgReal, maxAvgFake)

Using Kaiming Initialization.

Testing Data from folder:  FastSpeech
Test: [0/2]	Time 0.101 (0.101)	Loss 6.6322 (6.6322)	Acc@1 0.000 (0.000)
===> Acc@1 0.000


Testing Data from folder:  LA_Fake
Test: [0/1]	Time 0.101 (0.101)	Loss 0.0677 (0.0677)	Acc@1 96.875 (96.875)
===> Acc@1 96.875


Testing Data from folder:  Tacotron&Waveglow
Test: [0/1]	Time 0.092 (0.092)	Loss 0.1586 (0.1586)	Acc@1 100.000 (100.000)
===> Acc@1 100.000


Testing Data from folder:  WaveNet
Test: [0/1]	Time 0.093 (0.093)	Loss nan (nan)	Acc@1 100.000 (100.000)
===> Acc@1 100.000


Testing Data from folder:  WaveRNN
Test: [0/2]	Time 0.095 (0.095)	Loss 4.3135 (4.3135)	Acc@1 4.688 (4.688)
===> Acc@1 4.225


Testing Data from folder:  la
Test: [0/1]	Time 0.093 (0.093)	Loss 0.1487 (0.1487)	Acc@1 98.214 (98.214)
===> Acc@1 98.214


Testing Data from folder:  for
Test: [0/2]	Time 0.095 (0.095)	Loss 9.2640 (9.2640)	Acc@1 4.688 (4.688)
===> Acc@1 4.545

Real: 51.379870653152466 [98.21428680419922, 4.545454502105713]
Fake: 60

KeyboardInterrupt: ignored

## Testing for individual audio samples

In [None]:
!pip install spafe

Collecting spafe
  Downloading spafe-0.1.2-py3-none-any.whl (77 kB)
[?25l[K     |████▎                           | 10 kB 26.4 MB/s eta 0:00:01[K     |████████▌                       | 20 kB 26.2 MB/s eta 0:00:01[K     |████████████▊                   | 30 kB 12.7 MB/s eta 0:00:01[K     |█████████████████               | 40 kB 9.6 MB/s eta 0:00:01[K     |█████████████████████▏          | 51 kB 4.3 MB/s eta 0:00:01[K     |█████████████████████████▍      | 61 kB 4.5 MB/s eta 0:00:01[K     |█████████████████████████████▋  | 71 kB 4.6 MB/s eta 0:00:01[K     |████████████████████████████████| 77 kB 2.9 MB/s 
Installing collected packages: spafe
Successfully installed spafe-0.1.2


In [None]:
import librosa
from spafe.features.lfcc import lfcc

def create_fused_features(mfccArr, lfccArr):
  mfccArr = torch.Tensor(mfccArr)
  lfccArr = torch.Tensor(lfccArr)

  mfccArr = torch.reshape(F.pad(mfccArr, (0,0,58,58), mode='constant', value=0),(-1,1,136,44))
  lfccArr = torch.reshape(F.pad(lfccArr, (15,16), mode='constant', value=0),(-1,1,136,44))

  fusedFeatures = aff(mfccArr, lfccArr) 
  print(fusedFeatures.shape)

  return fusedFeatures


def test_model_for_audio_sample(audioFilePath, model, checkpointPath):
  # load the model parameters
  model, optim = load_model(model, checkpointPath)

  # compute the MFCC's for every second of audio
  audioGenerator = librosa.stream(audioFilePath, block_length=50, frame_length=441, hop_length = 441, fill_value=0)
  mfccs = []
  for audio in audioGenerator:
      mfcc = librosa.feature.mfcc(audio, sr=22050)
      mfccs.append(mfcc)
  mfccs = np.array(mfccs)
  print(mfccs.shape)

  # compute the LFCC's for every second of audio
  audioGenerator = librosa.stream(audioFilePath, block_length=50, frame_length=441, hop_length = 441, fill_value=0)
  lfccs = []
  for audio in audioGenerator:
      lf = lfcc(audio)
      lfccs.append(lf)
  lfccs = np.array(lfccs)
  print(lfccs.shape)

  # compute fused features
  fusedFeatures = create_fused_features(mfccs, lfccs)

  # test using the model
  with torch.no_grad():
    # for i in range(fusedFeatures.shape[0]):
    input = fusedFeatures
    input = input.to(device, non_blocking=True)

    output = model(input)
    print(output)


In [None]:
model = se_resnet50(2,True).to(torch.device("cuda:0"))
checkpointPath = './model_snapshots/8/model_best.pth.tar'
audioFilePath = './data/test/fake/WaveNet/Audio_2.wav' #'./data/test/real/for/1.wav'

test_model_for_audio_sample(audioFilePath, model, checkpointPath)

Using Kaiming Initialization.
(8, 20, 44)
(8, 136, 13)


  return x / np.std(x)


torch.Size([8, 1, 136, 44])
tensor([[nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan]], device='cuda:0')
