In [None]:
#%load_ext autoreload
#%autoreload 2
import torch
torch.set_grad_enabled(False)
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import pyrubberband as pyrb
import sys
import gc
from scipy.signal import fftconvolve, correlate
from scipy.io import wavfile

hann = lambda win: 0.5*(1-np.cos(2*np.pi*np.arange(win)/win))

def get_cepstrum(x):
    """
    Compute the cepstrum of an entire chunk of audio

    Parameters
    ----------
    x: ndarray(N)
        Audio samples
    
    Returns
    -------
    ndarray(N)
        Cepstrum
    """
    x = x*hann(x.size)
    F = np.abs(np.fft.rfft(x))
    F = np.fft.irfft(np.log(F+1e-8))
    return F


def get_z_score(c, delta, buff=0, start_buff=0):
    """
    Compute a z-score for the a correlation vector or cepstrum
    at a particular offset
    The mean/std are computed ignoring the offset location, and
    there is the option to ignore locations from the beginning or
    slightly to the left / slightly to the right of the location

    Parameters
    ----------
    c: ndarray(N)
        Correlation vector/cepstrum
    delta: int
        Delay at which to check for the pseudorandom sequence
    buff: int
        Buffer on either side of delta to ignore when computing mu/std
        for z-score
    start_buff: int
        Ignore this many from the start when computing mu/std 
        for z-score
    """
    cmu = np.array(c)
    if start_buff > 0:
        cmu[0:start_buff] = np.nan
    cmu[delta-buff:delta+buff+1] = np.nan
    mu = np.nanmean(cmu)
    std = np.nanstd(cmu)
    return (c[delta]-mu)/std

## Single Echo Examples

In [None]:
x, sr = librosa.load("../Writeup/supplementary/prince.mp3", sr=44100)

lag_start = 25
lag_end = 150
rg = np.arange(lag_start, lag_end+1)

In [None]:
for instrument in ["groove", "guitarset", "vocalset"]:
    model = torch.jit.load(f"../ArtistProtectModels/SingleEchoes/Rave/{instrument}_clean.ts").eval()
    z = model.encode(torch.from_numpy(x).reshape(1,1,-1))
    yclean = model.decode(z).numpy().reshape(-1)
    wavfile.write(f"../Writeup/supplementary/rave_prince_{instrument}_clean.wav", sr, yclean)
    
    model = torch.jit.load(f"../ArtistProtectModels/SingleEchoes/Rave/{instrument}_50.ts").eval()
    z = model.encode(torch.from_numpy(x).reshape(1,1,-1))
    y50 = model.decode(z).numpy().reshape(-1)
    wavfile.write(f"../Writeup/supplementary/rave_prince_{instrument}_50.wav", sr, yclean)
    
    model = torch.jit.load(f"../ArtistProtectModels/SingleEchoes/Rave/{instrument}_75.ts").eval()
    z = model.encode(torch.from_numpy(x).reshape(1,1,-1))
    y75 = model.decode(z).numpy().reshape(-1)
    wavfile.write(f"../Writeup/supplementary/rave_prince_{instrument}_75.wav", sr, yclean)
    
    model = torch.jit.load(f"../ArtistProtectModels/SingleEchoes/Rave/{instrument}_100.ts").eval()
    z = model.encode(torch.from_numpy(x).reshape(1,1,-1))
    y100 = model.decode(z).numpy().reshape(-1)
    wavfile.write(f"../Writeup/supplementary/rave_prince_{instrument}_100.wav", sr, yclean)
    
    
    fac = 0.8
    plt.figure(figsize=(fac*10, fac*5))
    
    cep = get_cepstrum(yclean)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    cep = get_cepstrum(y50)
    z50 = get_z_score(cep[0:lag_end+1], 50, start_buff=lag_start)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    cep = get_cepstrum(y75)
    z75 = get_z_score(cep[0:lag_end+1], 75, start_buff=lag_start)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    cep = get_cepstrum(y100)
    z100 = get_z_score(cep[0:lag_end+1], 100, start_buff=lag_start)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    plt.xlabel("Echo (Samples)")
    plt.ylabel("Cepstrum Value")
    plt.legend(["Clean", "50   (z[50] = {:.1f})".format(z50), "75   (z[75] = {:.1f})".format(z75), "100 (z[100]={:.1f})".format(z100)])
    name = {"guitarset":"Guitarset", "vocalset":"VocalSet", "groove":"Drums"}[instrument]
    plt.title(f"Prince Jazz Cepstra $c$ for {name} Rave Models")
    
    plt.savefig(f"../Writeup/supplementary/RaveCepstra_{instrument}.svg", bbox_inches='tight')

### Dance Diffusion Example

In [None]:
sys.path.append(f"../dance-diffusion/audio_diffusion")
from utils import load_model_for_synthesis, do_style_transfer
sample_size = 81920
sample_rate = 44100  
noise_level = 0.2
device = "cuda"
for instrument in ["groove", "guitarset", "vocalset"]:
    model = load_model_for_synthesis(f"../ArtistProtectModels/SingleEchoes/DanceDiffusion/{instrument}_clean.ckpt", sample_size, sample_rate, device)
    torch.cuda.empty_cache()
    gc.collect()
    n = sample_size*(x.size//sample_size)
    xi = x[0:n]
    xi = torch.from_numpy(xi[None, None, :]).to(device)
    y = do_style_transfer(model, xi, steps=100, noise_level=noise_level,device=device)
    yclean = y.detach().cpu().numpy()[0, 0, :]
    wavfile.write(f"../Writeup/supplementary/dd_prince_{instrument}_{noise_level}_clean.wav", sr, yclean)
    
    model = load_model_for_synthesis(f"../ArtistProtectModels/SingleEchoes/DanceDiffusion/{instrument}_50.ckpt", sample_size, sample_rate, device)
    torch.cuda.empty_cache()
    gc.collect()
    n = sample_size*(x.size//sample_size)
    xi = x[0:n]
    xi = torch.from_numpy(xi[None, None, :]).to(device)
    y = do_style_transfer(model, xi, steps=100, noise_level=noise_level,device=device)
    y50 = y.detach().cpu().numpy()[0, 0, :]
    wavfile.write(f"../Writeup/supplementary/dd_prince_{instrument}_{noise_level}_50.wav", sr, y50)
    
    model = load_model_for_synthesis(f"../ArtistProtectModels/SingleEchoes/DanceDiffusion/{instrument}_75.ckpt", sample_size, sample_rate, device)
    torch.cuda.empty_cache()
    gc.collect()
    n = sample_size*(x.size//sample_size)
    xi = x[0:n]
    xi = torch.from_numpy(xi[None, None, :]).to(device)
    y = do_style_transfer(model, xi, steps=100, noise_level=noise_level,device=device)
    y75 = y.detach().cpu().numpy()[0, 0, :]
    wavfile.write(f"../Writeup/supplementary/dd_prince_{instrument}_{noise_level}_75.wav", sr, y75)
    
    model = load_model_for_synthesis(f"../ArtistProtectModels/SingleEchoes/DanceDiffusion/{instrument}_100.ckpt", sample_size, sample_rate, device)
    torch.cuda.empty_cache()
    gc.collect()
    n = sample_size*(x.size//sample_size)
    xi = x[0:n]
    xi = torch.from_numpy(xi[None, None, :]).to(device)
    y = do_style_transfer(model, xi, steps=100, noise_level=noise_level,device=device)
    y100 = y.detach().cpu().numpy()[0, 0, :]
    wavfile.write(f"../Writeup/supplementary/dd_prince_{instrument}_{noise_level}_100.wav", sr, y100)
    
    
    fac = 0.8
    plt.figure(figsize=(fac*10, fac*5))
    
    cep = get_cepstrum(yclean)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    cep = get_cepstrum(y50)
    z50 = get_z_score(cep[0:lag_end+1], 50, start_buff=lag_start)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    cep = get_cepstrum(y75)
    z75 = get_z_score(cep[0:lag_end+1], 75, start_buff=lag_start)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    cep = get_cepstrum(y100)
    z100 = get_z_score(cep[0:lag_end+1], 100, start_buff=lag_start)
    cep = cep[rg[0]:rg[-1]+1]
    plt.plot(rg, cep)
    
    plt.xlabel("Echo (Samples)")
    plt.ylabel("Cepstrum Value")
    plt.legend(["Clean", "50   (z[50] = {:.1f})".format(z50), "75   (z[75] = {:.1f})".format(z75), "100 (z[100]={:.1f})".format(z100)])
    name = {"guitarset":"Guitarset", "vocalset":"VocalSet", "groove":"Drums"}[instrument]
    plt.title(f"Prince Jazz Cepstra $c$ for {name} Dance Diffusion Models")
    
    plt.savefig(f"../Writeup/supplementary/DDCepstra_{instrument}.svg", bbox_inches='tight')

## Pseudorandom Echo Examples

In [None]:
model = torch.jit.load(f"../ArtistProtectModels/SingleEchoes/Rave/guitarset_clean.ts").eval()
z = model.encode(torch.from_numpy(x).reshape(1,1,-1))
yclean = model.decode(z).numpy().reshape(-1)

patt = 0
model = torch.jit.load(f"../ArtistProtectModels/PNEchoes/Rave/guitarset_pn{patt}.ts").eval()
z = model.encode(torch.from_numpy(x).reshape(1,1,-1))
ypn = model.decode(z).numpy().reshape(-1)
wavfile.write(f"../Writeup/supplementary/prince_guitarset_pn{patt}.wav", sr, ypn)

In [None]:
import sys
sys.path.append("..")
from prepare_echo_dataset_pn import PN_PATTERNS_1024_8
from scipy import signal

alpha = 0.01
lag = 75
q = np.array(PN_PATTERNS_1024_8[patt])
L = q.size

yh = ypn*hann(ypn.size)
s = np.abs(np.fft.rfft(yh))
s = np.fft.irfft(np.log(s+1e-8))
c = signal.correlate(s, q, mode='valid', method='fft')
z = get_z_score(c[0:L+2*lag], lag)

yh = yclean*hann(ypn.size)
s = np.abs(np.fft.rfft(yh))
s = np.fft.irfft(np.log(s+1e-8))
c2 = signal.correlate(s, q, mode='valid', method='fft')
z2 = get_z_score(c2[0:L+2*lag], lag)

rg = np.arange(3, 100)


fac = 0.6
plt.figure(figsize=(fac*10, fac*5))
plt.plot(rg, c2[rg])
plt.plot(rg, c[rg])
plt.axvline([75], linestyle='--', c='k', linewidth=1)
plt.title("Prince Jazz Cepstra Cross-Correlation $c^*$ for Guitar Rave Models")
plt.ylabel("Cross-Correlation")
plt.xlabel("Offset")
plt.legend(["Clean (z[75]={:.1f})".format(z2), "Correct PN Pattern (z[75]={:.1f})".format(z)])
plt.savefig("../Writeup/supplementary/RavePNCepstra.svg", bbox_inches='tight')

In [None]:
x = np.array(0.5*(1+PN_PATTERNS_1024_8[patt]), dtype=int)
s = "".join([str(i) for i in x])
print(s)