In [1]:
import librosa
import tensorflow as tf
import os
from keras import Sequential
from keras.layers import Dense, LSTM, Input, Dropout, concatenate, Conv1D
from keras import layers
import keras
import numpy as np
import matplotlib.pyplot as plt
import time
import soundfile
from IPython.display import Audio as IAudio

timestep = 2**12
sr = 2**14

In [2]:
def load_song(path,sr=2**14,timestep=timestep):
    track,_ = librosa.load(path,sr=sr)
    num_slices = track.shape[0]//timestep
    track = track[:num_slices*timestep]
    track = track.reshape((-1,timestep))
    return track

In [3]:
cross_entropy_artist = tf.keras.losses.BinaryCrossentropy(from_logits=True)
cross_entropy_critic = tf.keras.losses.BinaryCrossentropy(from_logits=False)

def critic_loss(real_output, fake_output):
    real_loss = cross_entropy_critic(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy_critic(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def artist_loss(fake_output):
    return cross_entropy_artist(tf.ones_like(fake_output),fake_output)

In [4]:
artist_optimizer = tf.keras.optimizers.Adam(1e-4)
critic_optimizer = tf.keras.optimizers.Adam(1e-4)

In [5]:
def song_iteration(num_seconds = 8,sr=2**14,timestep=2**11,batch_size=1):
    num_steps = int(num_seconds*sr/timestep)
    
    track_path = 'tracks/' + os.listdir('tracks')[np.random.randint(0,len(os.listdir('tracks')))]
    real_track = load_song(track_path,sr=sr,timestep=timestep)
    beginpts =  np.random.randint(1,real_track.shape[0]-num_steps,batch_size)
    
    initial_sound = tf.convert_to_tensor([real_track[beginpt-1] for beginpt in beginpts])
    initial_probability = tf.ones([batch_size,timestep])/2
    art_output, crit_output = train_step(music_sample = initial_sound, previous_artist_output = initial_sound, 
               previous_critic_chances = [initial_probability]*2,batch_size=batch_size)
    
    for i in range(num_steps):
        curr_slice = tf.convert_to_tensor([real_track[beginpt+i] for beginpt in beginpts])
        art_output, crit_output = train_step(music_sample = curr_slice,previous_artist_output=art_output,
                                            previous_critic_chances = crit_output,batch_size=batch_size)
    
    


In [6]:
@tf.function
def train_step( music_sample, previous_artist_output, previous_critic_chances,batch_size = 1):
    #Me aseguro de que las entradas tengan 2 dimensiones
    music_sample = tf.reshape( tf.convert_to_tensor(music_sample),[batch_size,-1])
    previous_artist_output = tf.reshape( tf.convert_to_tensor(previous_artist_output) ,[batch_size,-1])
    previous_critic_chances = [tf.reshape( tf.convert_to_tensor(chances) ,[batch_size,-1]) for chances in previous_critic_chances]
    
    with tf.GradientTape() as art_tape , tf.GradientTape() as crit_tape:
        #Calculo las salidas del artista
        
        art_tape.watch(previous_artist_output)
       
        art_output = artist(tf.reshape(previous_artist_output,[batch_size,-1,1]), training = True)
        art_output = tf.reshape(art_output,[batch_size,-1])
        
        
        #Calculo las probabilidades del crítico
        
        crit_tape.watch(music_sample)
        crit_tape.watch(previous_critic_chances)
        crit_tape.watch(art_output)
        
        real_output = critic([music_sample,previous_critic_chances[0]],training=True)
        fake_output = critic([art_output,previous_critic_chances[1]],training=True)
        
        #Calculo las losses
        art_loss = artist_loss(fake_output)
        crit_loss = critic_loss(real_output,fake_output) 
        

            
    gradients_of_artist = art_tape.gradient(art_loss, artist.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    artist_optimizer.apply_gradients(zip(gradients_of_artist, artist.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))
    
    return art_output, [real_output,fake_output]

In [7]:
NN_per_layer_critic = 2**9
NN_per_layer_artist = [2**6,2**6,2**4,1]

artist = Sequential()

artist.add(Conv1D(NN_per_layer_artist[0],2**7,input_shape=(timestep,1),padding='same' ,activation='tanh'))
artist.add(Conv1D(NN_per_layer_artist[1],2**7,padding='same' ,activation='tanh'))
artist.add(Conv1D(NN_per_layer_artist[2],2**7,padding='same' ,activation='tanh'))
artist.add(Conv1D(NN_per_layer_artist[3],2**7,padding='same' ,activation='tanh'))


#Critico
input_1 = Input(shape=(timestep,))
dense_1 = Dense(NN_per_layer_critic,activation='tanh')(input_1)

input_2 = Input(shape=(timestep,))
dense_2 = Dense(NN_per_layer_critic,activation='tanh')(input_2)

merged = concatenate([dense_1,dense_2])

MD_1 = Dense(NN_per_layer_critic,activation='sigmoid')(merged)
MD_2 = Dense(NN_per_layer_critic,activation='sigmoid')(MD_1)
MD_3 = Dense(timestep,activation='sigmoid')(MD_2)

critic = tf.keras.models.Model(inputs=[input_1,input_2],outputs = MD_3)


In [8]:
critic.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 4096)]       0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 4096)]       0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 512)          2097664     input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 512)          2097664     input_2[0][0]                    
_______________________________________________________________________________________

In [9]:
def decide_track(track):
    track = np.array(track).reshape(-1)
    num_slices = track.shape[0]//timestep
    track = track[:num_slices*timestep]
    track = track.reshape((-1,timestep))
    curr_chances = np.ones(timestep)/2
    preds = []
    for i in range(track.shape[0]):
        chances = critic.predict([track[i].reshape(1,-1), curr_chances.reshape(1,-1)])
        preds.append(chances)
        curr_chances=chances
    preds = np.array(preds).reshape(-1)
    return np.mean(preds), preds

def continue_sample(initial_sample,num_seconds):
    num_steps = int(num_seconds*sr/timestep)
    track = [initial_sample.reshape(-1)]
    for i in range(1,num_steps):
        track.append(artist.predict(track[-1].reshape(1,-1,1)).reshape(-1))
    
    return track


In [None]:
for i in range(1001):
    start_time = time.time()
    song_iteration(timestep=timestep,num_seconds=1,batch_size =128)
    print('Iteration '+ str(i) + ' - Time elapsed: ' + str(time.time()-start_time))
    
    if i%100 == 0:
        
        print('Current Iteration: ' + str(i))
        
        track_path = 'tracks/' + os.listdir('tracks')[np.random.randint(0,len(os.listdir('tracks')))]
        print(track_path)
        real_track = load_song(track_path,sr=sr,timestep=timestep)
        fake_track = continue_sample(real_track[0],4)
        white_noise = np.random.randn(128*timestep)
        
        real_mean, _ = decide_track(real_track[:128])
        fake_mean, _ = decide_track(fake_track)
        noise_mean, _ = decide_track(white_noise)
        
        artist.save_weights('artist')
        critic.save_weights('critic')      
        
        
        print('Real Track Mean: ' + str(real_mean))
        print('Fake Track Mean: ' + str(fake_mean))
        print('White Noise Mean: ' + str(noise_mean))
        
        soundfile.write('test-' + str(i) + '.wav',np.array(fake_track).reshape(-1),samplerate=2**14,format='WAV')

        print(time.ctime())
        time.sleep(1)
        



Iteration 0 - Time elapsed: 15.433238744735718
Current Iteration: 0
tracks/Through the Fire and Flames (Live at Loud Park Festival 2014).mp3




Real Track Mean: 0.50091803
Fake Track Mean: 0.500922
White Noise Mean: 0.50092316
Mon Jan 31 23:40:18 2022
Iteration 1 - Time elapsed: 5.431882858276367
Iteration 2 - Time elapsed: 5.251394033432007
Iteration 3 - Time elapsed: 5.269761562347412
Iteration 4 - Time elapsed: 5.260185956954956
Iteration 5 - Time elapsed: 5.275302410125732
Iteration 6 - Time elapsed: 5.251262903213501
Iteration 7 - Time elapsed: 5.288374185562134
Iteration 8 - Time elapsed: 5.46442985534668
Iteration 9 - Time elapsed: 5.332797288894653
Iteration 10 - Time elapsed: 5.2919135093688965
Iteration 11 - Time elapsed: 5.322920560836792
Iteration 12 - Time elapsed: 5.388190984725952
Iteration 13 - Time elapsed: 5.364548206329346
Iteration 14 - Time elapsed: 5.306985139846802
Iteration 15 - Time elapsed: 5.300653696060181
Iteration 16 - Time elapsed: 5.352154970169067
Iteration 17 - Time elapsed: 5.311307668685913
Iteration 18 - Time elapsed: 5.380719184875488
Iteration 19 - Time elapsed: 5.3107545375823975
Iterati

In [17]:
track_path = 'tracks//' + os.listdir('tracks')[np.random.randint(0,len(os.listdir('tracks')))]
real_track = load_song(track_path,sr=sr,timestep=timestep)

In [17]:
mean, preds = decide_track(real_track.reshape(-1))

In [18]:
print(mean)

0.9358672


In [None]:
    artist.save_weights('artist')
    critic.save_weights('critic')

In [None]:
noise = np.random.randn(20,timestep)
noise_mean, noise_preds = decide_track(noise.reshape(-1))
plt.plot(noise_preds)

In [None]:
real_track[0]

In [18]:
newtrack = continue_sample(real_track[0],num_seconds = 10)

In [23]:
newtrack = np.array(newtrack).reshape(-1)

In [24]:
np.where(np.isnan(newtrack))

(array([], dtype=int64),)

In [25]:
faketrack_mean, faketrack_preds = decide_track(np.array(newtrack).reshape(-1))

In [26]:
faketrack_mean

0.50239193

In [None]:
soundfile.write('test-denseNN.wav',np.array(newtrack).reshape(-1),samplerate=2**14,format='WAV')

In [None]:
plt.plot(np.array(newtrack).reshape(-1))

In [None]:
IAudio(np.array(newtrack[0]).reshape(-1),rate=sr)