In [1]:
import librosa
import tensorflow as tf
import os
from keras import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Reshape, UpSampling2D
from keras import layers
import keras
import numpy as np
import matplotlib.pyplot as plt
import time
import soundfile
from IPython.display import Audio as IAudio

timestep = 2**9
sr = 2**14

In [2]:
cross_entropy_artist = tf.keras.losses.BinaryCrossentropy(from_logits=True)
cross_entropy_critic = tf.keras.losses.BinaryCrossentropy(from_logits=False)

def critic_loss(real_output, fake_output):
    real_loss = cross_entropy_critic(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy_critic(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def artist_loss(fake_output):
    return cross_entropy_artist(tf.ones_like(fake_output),fake_output)

In [3]:
artist_optimizer = tf.keras.optimizers.Adam(1e-5)
critic_optimizer = tf.keras.optimizers.Adam(1e-5)

In [4]:
critic = Sequential()

critic.add(Conv2D(2**3, 2**3, activation='relu',padding='same',input_shape=(1025,1292,2)))
critic.add(MaxPooling2D(pool_size=(25,4*17)))
critic.add(Conv2D(2**4, 2**3, activation='relu',padding='same'))
critic.add(Conv2D(2, 2**4, activation='relu',padding='same'))
critic.add(Flatten())
critic.add(Dense(2**7,activation='relu'))
critic.add(Dense(2**7,activation='relu'))
critic.add(Dense(1,activation='sigmoid'))

artist = Sequential()
artist.add(Dense(2**7,activation='relu',input_shape=(41*19,)))
artist.add(Dense(2**7,activation='relu'))
artist.add(Dense(41*19,activation='relu'))
artist.add(Reshape((41,19,1)))
artist.add(Conv2D(2**2,kernel_size=(7,3),padding='same',activation='relu'))
artist.add(UpSampling2D((5,4)))
artist.add(Conv2D(2**2,kernel_size=(10,6),padding='same',activation='relu'))
artist.add(UpSampling2D((5,17)))
artist.add(Conv2D(2**2,kernel_size=(10,6),padding='same',activation='relu'))
artist.add(Conv2D(2**2,kernel_size=2**4,padding='same',activation='relu'))
artist.add(Conv2D(2,kernel_size=2**4,padding='same'))


In [6]:
critic.load_weights('spect-critic')
artist.load_weights('spect-artist')

NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for spect-critic

In [7]:
def spec_2_song(spectrogram):
    empty_spectrogram = np.zeros_like(spectrogram[:,:,0],dtype = np.complex64)
    empty_spectrogram.real = spectrogram[:,:,0]
    empty_spectrogram.imag = spectrogram[:,:,1]
    track = librosa.istft(empty_spectrogram)
    return track

def create_Spectrogram(track):
    spectrogram_complex = librosa.stft(track)
    real = spectrogram_complex.real.reshape(spectrogram_complex.shape+(1,))
    imaginary = spectrogram_complex.imag.reshape(spectrogram_complex.shape+(1,))
    spectrogram = np.concatenate((real,imaginary),axis = 2)
    return spectrogram

In [9]:
@tf.function
def train_step(batch_size=1):
    
    addresses = [os.path.join('spectrograms',trackid) for trackid in os.listdir('spectrograms/')]
    random_indexes = np.random.randint(0,len(addresses),batch_size)
    spectrograms = np.array([np.load(addresses[i]) for i in random_indexes])
#     spectrograms = tf.convert_to_tensor(spectrograms)
    
    noise = np.random.normal(size = (batch_size,41*19))

    with tf.GradientTape() as art_tape, tf.GradientTape() as crit_tape:
        
        fake_output = artist(noise)
        
        
        real_preds = critic(spectrograms)
        fake_preds = critic(fake_output)
        
        art_loss = artist_loss(fake_preds)
        crit_loss = critic_loss(real_preds,fake_preds)

    gradients_of_artist = art_tape.gradient(art_loss, artist.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    artist_optimizer.apply_gradients(zip(gradients_of_artist, artist.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))


In [10]:
artist.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 128)               99840     
_________________________________________________________________
dense_4 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_5 (Dense)              (None, 779)               100491    
_________________________________________________________________
reshape (Reshape)            (None, 41, 19, 1)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 41, 19, 4)         88        
_________________________________________________________________
up_sampling2d (UpSampling2D) (None, 205, 76, 4)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 205, 76, 4)       

In [13]:
for i in range(501):
    start_time = time.time()
    train_step(batch_size=2)
    print('Iteration '+ str(i) + ' - Time elapsed: ' + str(time.time()-start_time))
    
    if i%50 == 0:
        
        print('Current Iteration: ' + str(i))
        
        track_path = 'tracks/' + os.listdir('tracks')[np.random.randint(0,len(os.listdir('tracks')))]
        print(track_path)
        real_track = librosa.load(track_path)[0]
        real_spectrogram = create_Spectrogram(real_track)
                
        noise = np.random.randn(1,41*19)
        fake_spectrogram = artist.predict(noise)
        
        white_noise = np.random.randn(1,1025,1292,2)
        
        real_pred = critic.predict(real_spectrogram.reshape((1,)+ real_spectrogram.shape))
        fake_pred = critic.predict(fake_spectrogram)
        noise_pred = critic.predict(white_noise)

        artist.save_weights('spect-artist')
        critic.save_weights('spect-critic')     
        
        
        print('Real Track: ' + str(real_pred))
        print('Fake Track: ' + str(fake_pred))
        print('White Noise: ' + str(noise_pred))
        
        fake_track = spec_2_song(fake_spectrogram[0])
        
        soundfile.write('test-' + str(i) + '.wav',np.array(fake_track).reshape(-1),samplerate=22033,format='WAV')

        print(time.ctime())
        time.sleep(1)
        

Iteration 0 - Time elapsed: 0.023987531661987305
Current Iteration: 0
tracks/Reach out for the Light (Live) - Recorded @ Wacken & Masters of Rock.mp3
Real Track: [[0.9895454]]
Fake Track: [[0.49666876]]
White Noise: [[0.46391544]]
Mon Jan 31 22:58:54 2022
Iteration 1 - Time elapsed: 0.004998445510864258
Iteration 2 - Time elapsed: 0.005003213882446289
Iteration 3 - Time elapsed: 0.004997730255126953
Iteration 4 - Time elapsed: 0.005997419357299805
Iteration 5 - Time elapsed: 0.004998445510864258
Iteration 6 - Time elapsed: 0.004998445510864258
Iteration 7 - Time elapsed: 0.005998134613037109
Iteration 8 - Time elapsed: 0.004997968673706055
Iteration 9 - Time elapsed: 0.004998683929443359
Iteration 10 - Time elapsed: 0.004997968673706055
Iteration 11 - Time elapsed: 0.004998445510864258
Iteration 12 - Time elapsed: 0.40742039680480957
Iteration 13 - Time elapsed: 1.1443359851837158
Iteration 14 - Time elapsed: 1.158754825592041
Iteration 15 - Time elapsed: 1.139695644378662
Iteration 16

Iteration 162 - Time elapsed: 0.3990814685821533
Iteration 163 - Time elapsed: 1.1613619327545166
Iteration 164 - Time elapsed: 1.1573617458343506
Iteration 165 - Time elapsed: 1.1606919765472412
Iteration 166 - Time elapsed: 1.1570916175842285
Iteration 167 - Time elapsed: 1.143768548965454
Iteration 168 - Time elapsed: 1.171475887298584
Iteration 169 - Time elapsed: 1.1404058933258057
Iteration 170 - Time elapsed: 1.154973030090332
Iteration 171 - Time elapsed: 1.156597375869751
Iteration 172 - Time elapsed: 1.157489538192749
Iteration 173 - Time elapsed: 1.152726173400879
Iteration 174 - Time elapsed: 1.1680676937103271
Iteration 175 - Time elapsed: 1.1544272899627686
Iteration 176 - Time elapsed: 1.1530787944793701
Iteration 177 - Time elapsed: 1.1559042930603027
Iteration 178 - Time elapsed: 1.1546626091003418
Iteration 179 - Time elapsed: 1.1562139987945557
Iteration 180 - Time elapsed: 1.1513216495513916
Iteration 181 - Time elapsed: 1.1548478603363037
Iteration 182 - Time elaps

Iteration 319 - Time elapsed: 1.1639823913574219
Iteration 320 - Time elapsed: 1.150649070739746
Iteration 321 - Time elapsed: 1.160710334777832
Iteration 322 - Time elapsed: 1.1580512523651123
Iteration 323 - Time elapsed: 1.158827543258667
Iteration 324 - Time elapsed: 1.1642084121704102
Iteration 325 - Time elapsed: 1.1308088302612305
Iteration 326 - Time elapsed: 1.1824936866760254
Iteration 327 - Time elapsed: 3.78800630569458
Iteration 328 - Time elapsed: 2.6721887588500977
Iteration 329 - Time elapsed: 1.9923515319824219
Iteration 330 - Time elapsed: 1.1537392139434814
Iteration 331 - Time elapsed: 1.151578664779663
Iteration 332 - Time elapsed: 1.1521689891815186
Iteration 333 - Time elapsed: 1.1586496829986572
Iteration 334 - Time elapsed: 1.1566715240478516
Iteration 335 - Time elapsed: 1.157405138015747
Iteration 336 - Time elapsed: 1.1540014743804932
Iteration 337 - Time elapsed: 1.154811143875122
Iteration 338 - Time elapsed: 1.1486754417419434
Iteration 339 - Time elapsed

Iteration 475 - Time elapsed: 1.1540184020996094
Iteration 476 - Time elapsed: 1.1555864810943604
Iteration 477 - Time elapsed: 1.1293580532073975
Iteration 478 - Time elapsed: 1.185624361038208
Iteration 479 - Time elapsed: 1.1545250415802002
Iteration 480 - Time elapsed: 1.1424903869628906
Iteration 481 - Time elapsed: 1.1544551849365234
Iteration 482 - Time elapsed: 1.1548676490783691
Iteration 483 - Time elapsed: 1.1555633544921875
Iteration 484 - Time elapsed: 1.1548750400543213
Iteration 485 - Time elapsed: 1.1492044925689697
Iteration 486 - Time elapsed: 1.1614177227020264
Iteration 487 - Time elapsed: 1.156360149383545
Iteration 488 - Time elapsed: 1.1552882194519043
Iteration 489 - Time elapsed: 1.1561410427093506
Iteration 490 - Time elapsed: 1.1599390506744385
Iteration 491 - Time elapsed: 1.1549546718597412
Iteration 492 - Time elapsed: 1.1436936855316162
Iteration 493 - Time elapsed: 1.1586556434631348
Iteration 494 - Time elapsed: 1.1555073261260986
Iteration 495 - Time e

In [None]:
noise = np.random.randn(1,41*19)


In [None]:
noise.shape

In [None]:
fake_spec = artist.predict(noise)

In [None]:
fake_spec.shape

In [None]:
track = spec_2_song(fake_spec[0])

In [None]:
track.shape[0]/30

In [None]:
IAudio(track,rate=22033)

In [None]:
newtrack = continue_sample(real_track[0],num_seconds = 10)

In [None]:
newtrack = np.array(newtrack).reshape(-1)

In [None]:
np.where(np.isnan(newtrack))

In [None]:
faketrack_mean, faketrack_preds = decide_track(np.array(newtrack).reshape(-1))

In [None]:
faketrack_mean

In [None]:
soundfile.write('test-denseNN.wav',np.array(newtrack).reshape(-1),samplerate=2**14,format='WAV')

In [None]:
plt.plot(np.array(newtrack).reshape(-1))

In [None]:
IAudio(np.array(newtrack[0]).reshape(-1),rate=sr)