In [None]:
import qrandom
import torch 
from torch import tensor
import numpy as np
import torch.nn as nn
relu = nn.functional.relu
softmax = nn.functional.softmax
from torch.nn.functional import cross_entropy
from pathlib import Path
import os, shutil
from os import listdir
import torch.optim as optim
import pickle
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import librosa
import librosa.display
from IPython.display import Audio
import random
import gc

In [None]:
print(torch.cuda.current_device(),
torch.cuda.device(0),
torch.cuda.get_device_name(0))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None):
    fig, axs = plt.subplots(1, 1)
    axs.set_title(title or 'Spectrogram (db)')
    axs.set_ylabel(ylabel)
    axs.set_xlabel('frame')
    im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect)
    if xmax:
        axs.set_xlim((0, xmax))
    fig.colorbar(im, ax=axs)
    plt.show(block=False)

In [None]:
specs_dir_true = Path(r"C:\Users\g_bab\Downloads\botwTRAINING").expanduser()
true_specs = []
for z in range(len(listdir(specs_dir_true))):
    try:
        im = np.load(os.path.join(specs_dir_true, "spec"+str(z+1)+".npy"))
        true_specs.append(im)
    except:
        continue

In [None]:
print(len(true_specs))

In [None]:
#test if it loaded correctly
toaudio_list = []
for sample in true_specs[:100]:                #first 100 melspecs (np arrays) in a list
    #sample = sample.detach().numpy()
    toaudio_list.append(sample)

In [None]:
list_amp_values = []
for melspec in toaudio_list:
    audio_array = librosa.feature.inverse.mel_to_audio(melspec, sr = 16000)
    list_amp_values += list(audio_array)
playback = np.asarray(list_amp_values)

In [None]:
Audio(playback, rate=44100)  

In [None]:
print(np.shape(true_specs[1]))
plot_spectrogram(true_specs[1])
torch_test = torch.from_numpy(true_specs[1])
plot_spectrogram(torch_test)

In [None]:
split = np.shape(true_specs[1])[1]/2
print(split)
half1 = true_specs[1][:, :split]
half2 = true_specs[1][:, split:]

In [None]:
listspecs_torch = []
for i in true_specs:
    listspecs_torch.append(torch.from_numpy(i))
print(len(listspecs_torch))
print(len(true_specs))

In [None]:
#noise_shape = (100,)
epochs = 50   #change to 150
lr = 2e-4
batch_size = 1 #change to 30 if possible
height = 577
length = 123
loss = nn.BCELoss()

In [None]:
class discriminator(nn.Module):
    def __init__(self):
        super(discriminator, self).__init__()
        self.fc1 = nn.Linear(70971, 512)
        self.fc2 = nn.Linear(512, 1)
        self.activation = nn.LeakyReLU(0.1)

    def forward(self, x):
        x = x.view(-1, 70971)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return nn.Sigmoid()(x)


class generator(nn.Module):
    def __init__(self):
        super(generator, self).__init__()
        self.fc1 = nn.Linear(123, 1024)
        self.fc2 = nn.Linear(1024, 2048)
        self.fc3 = nn.Linear(2048, 70971)
        self.activation = nn.ReLU()

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        x = x.view(-1, 577, 123)
        return nn.Tanh()(x)

In [None]:
G = generator().to(device)
D = discriminator().to(device)

G_optimizer = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))
D_optimizer = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))


In [None]:
gc.collect()
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

In [None]:
for epoch in range(epochs):
    idxs = np.arange(len(listspecs_torch))
    np.random.shuffle(idxs)
    for batch_cnt in range(len(listspecs_torch)//batch_size):
        batch_indices = idxs[batch_cnt*batch_size : (batch_cnt + 1)*batch_size]
        batch = [listspecs_torch[index] for index in batch_indices]
        
        imgs = torch.stack(batch)
        
        # Training the discriminator
        # Real inputs are actual images of BOTW dataset
        # Fake inputs are from the generator
        # Real inputs should be classified as 1 and fake as 0
        
        real_inputs = imgs.to(device)
        #print("real inputs: ", real_inputs.shape)
        real_outputs = D(real_inputs)
        #print("guess on real outputs: ", real_outputs.shape)
        real_label = torch.ones(real_inputs.shape[0], 1).to(device)
        #print("label for real: ", real_label.shape)
        
        noise = (torch.rand(real_inputs.shape[0], 123) - 0.5) / 0.5
        #print("noise shape: ", noise.shape)
        noise = noise.to(device)
        fake_inputs = G(noise)
        #print("generated fake inputs: ", fake_inputs.shape)
        fake_outputs = D(fake_inputs)
        #print("discriminator guess on fake inputs : ", fake_outputs.shape)
        fake_label = torch.zeros(fake_inputs.shape[0], 1).to(device)
        #print("label for fake: ", fake_label.shape)
        
        outputs = torch.cat((real_outputs, fake_outputs), 0)
        targets = torch.cat((real_label, fake_label), 0)

        D_loss = loss(outputs, targets)
        D_optimizer.zero_grad()
        D_loss.backward()
        D_optimizer.step()

        # Training the generator
        # For generator, goal is to make the discriminator believe everything is 1
        noise = (torch.rand(real_inputs.shape[0], 123)-0.5)/0.5
        noise = noise.to(device)
        #print("noise for generator shape: ", noise.shape)
        fake_inputs = G(noise)
        #print("generated inputs:", fake_inputs.shape)
        fake_outputs = D(fake_inputs)
        #print("guesses on fake inputs:", fake_outputs.shape)
        fake_targets = torch.ones([fake_inputs.shape[0], 1]).to(device)
        #print("ideal guesses on fake inputs: ", fake_targets.shape)
        G_loss = loss(fake_outputs, fake_targets)
        G_optimizer.zero_grad()
        G_loss.backward()
        G_optimizer.step()

        if batch_cnt % 500 == 0:
            print('Epoch {} Iteration {}: discriminator_loss {:.3f} generator_loss {:.3f}'.format(epoch, batch_cnt, D_loss.item(), G_loss.item()))

    if (epoch+1) % 10 == 0:
        torch.save(G, r"C:\Users\g_bab\Downloads\saved_musicGAN\generator_epoch_{}.pth".format(epoch))
        print('Model saved.')

In [None]:
torch.save(G, r"C:\Users\g_bab\Downloads\saved_musicGAN\generator_epoch_{}.pth".format(epoch))

In [None]:
#credit to sbalian for qrandom

list_products = []
for i in range(100):                #first 100 melspecs (np arrays) in a list
    batch = []
    for i in range(batch_size):
        qrandomarray = np.zeros(577 * 123)
    
        #for q in range(577 * 123):
            #qrandomarray[q] = qrandom.random()
        for q in range(577 * 123):
            qrandomarray[q] = random.random()
        
        qrandomarray = np.reshape(qrandomarray, (577, 123))
        qrandomarray = (qrandomarray - 0.5) / 0.5
        batch.append(qrandomarray)
    batch = np.stack(batch)
    batch = batch.astype(np.float32)
    batch = torch.from_numpy(batch)
    batch = batch.to(device)
    
    product = G(batch)
    
    detached = product.to("cpu").detach().numpy()
    list_products.append(detached)
   

In [None]:
print(D_loss.item(), G_loss.item())

In [None]:
print(list_products[1].shape)

In [None]:
list_amp_values = []
for melspec in list_products:
    for i in range(len(melspec[0])):
        audio_array = librosa.feature.inverse.mel_to_audio(melspec, sr = 16000)
        list_amp_values += list(audio_array)
playback = np.asarray(list_amp_values)

In [None]:
Audio(playback, rate=44100)   