In [None]:
import glob
import os
import librosa
import numpy as np


import warnings
warnings.filterwarnings("ignore")
from spleeter.separator import Separator
from spleeter.audio.adapter import get_default_audio_adapter
import scipy.io.wavfile as wav



import madmom
import statistics as stats

#### Set Global Variables

In [None]:
#choose directories and other audio signal variables
path = 'playlists/playlist'
srate = 44100
audio_files = []
audio_files_mono = []
hopSize = 512
bufSize = 1024
m_length = 30

#### Load files from playlist

In [None]:
# Load files from playlist

audio_loader = get_default_audio_adapter()


for i, filename in enumerate(glob.glob(os.path.join(path, '*'))):
    #data, sr = librosa.core.load(filename, sr=srate)
    
    waveform, wf_srate = audio_loader.load(filename, sample_rate=srate)

    audio_files.append(waveform)
    
    waveform_mono = waveform.sum(axis=1) / 2
    audio_files_mono.append(waveform_mono)
    
num_files = len(audio_files)

#### Take tempo/beats for each track

In [None]:
beats = []
beats_lib = []
downbeats = []
song_data = {}
proc = madmom.features.downbeats.DBNDownBeatTrackingProcessor(beats_per_bar=4, fps=100)

for i, x in enumerate(audio_files_mono):   
        
    # finds the tempo and song length
    tempo, beat_times = librosa.beat.beat_track(x, sr=srate, hop_length=hopSize, start_bpm=80, units='time')
    if (tempo < 70):
        tempo = tempo*2
    if (tempo > 140):
        tempo = tempo/2
    
    beats_lib.append(beat_times)
    
    song_length = len(audio_files[i])/srate
    
    act = madmom.features.downbeats.RNNDownBeatProcessor()(audio_files[i])
    
    db = proc(act)
    
    b = []
    db_temp = []
    for y in db:
        b.append(y[0])
        if y[1] == 1:
            db_temp.append(y[0])
    
    beats.append(b)
    downbeats.append(db_temp)
    
    #prediction = proc2(audio_files[i])
    #key = madmom.features.key.key_prediction_to_label(prediction)
    key = 'null'
    
    song_data[i] = {
        'length': song_length,
        'tempo' : tempo,
        'track_num' : i+1,
        'key' : key,
        
    } 


In [None]:
song_data

#### Finds mean tempo and stretches tracks accordingly

In [None]:
#creates a list of tempos for the tracks and a mean tempo
tempo_list = []
for i in range(num_files):
    tempo_list.append(song_data[i].get('tempo'))

mean_tempo = np.mean(tempo_list)


In [None]:
#Finds the stretch value factors

for i, x in enumerate(audio_files_mono):
    audio = audio_files[i]
    stretch_value = mean_tempo/song_data[i].get('tempo')
    song_data[i]['stretch_value'] = stretch_value

#### Determine the segments that will be used for the mashup

In [None]:
#converts a time in seconds to a specific sample
def sec_convert(seconds):
    samples = seconds*srate
    return round(samples)

In [None]:
def sample_convert(samples):
    seconds = samples/srate
    return round(seconds)

In [None]:
def choose_beat(timestamp, songindex):

    for i, x in enumerate(beats[songindex]):
        if (timestamp < x < timestamp + 5):
            
            return x
        else:
            continue
    
    #found no suitable beats 
        
    return timestamp

In [None]:
def choose_downbeat(timestamp, songindex):
    db = downbeats[songindex][0]
    count = 0
    for x in (downbeats[songindex]):
        if x < timestamp:
            db = x

    value = db
    abs_diff_function = lambda list_value : abs(list_value - value)
    closest_value = min(beats_lib[songindex], key=abs_diff_function)

    return closest_value
    

In [None]:
# defines segments at the beginning and the end of each song
# after stretching these will be equal to m_length number of beats.
# these values are returned as sample values, not time

track_beginnings = []
track_ends = []

for i, x in enumerate(audio_files):
    
    s_v = song_data[i].get('stretch_value')
    first_downbeat = int(round(sec_convert(choose_downbeat(downbeats[i][0], i))))
    start_end = int(round(first_downbeat+(srate*(m_length*s_v))))
    
    track_beginnings.append(x[first_downbeat:start_end])
    
    m_temp = beats[i][-1] - (m_length*s_v)
    end_start = int(round(sec_convert(choose_downbeat(m_temp, i))))
    track_end = int(round(end_start+(srate*(m_length*s_v))))
    
    track_ends.append(x[end_start:track_end])
    
    song_data[i]['start_end'] = start_end
    song_data[i]['end_start'] = end_start
    song_data[i]['first_downbeat'] = first_downbeat
    song_data[i]['track_end'] = track_end
    

    

#### Splits tracks to stems using spleeter

In [None]:
separator = Separator('spleeter:4stems')

In [None]:
t_begin_stems = {}
t_end_stems = {}

for i, x in enumerate(audio_files):
    
    prediction = separator.separate(track_beginnings[i])
    
    vocal = prediction.get('vocals')
    t_voc = vocal.astype(np.float32)
    bass = prediction.get('bass')
    t_bass = bass.astype(np.float32)
    other = prediction.get('other')
    t_other = other.astype(np.float32)
    drums = prediction.get('drums')
    t_drums = drums.astype(np.float32) 
    
    t_begin_stems[i] = {
    'drums':t_drums.sum(axis=1) / 2, 
    'bass':t_bass.sum(axis=1) / 2, 
    'vocals':t_voc.sum(axis=1) / 2, 
    'other':t_other.sum(axis=1) / 2,
    }
    
    prediction = separator.separate(track_ends[i])
    
    vocal = prediction.get('vocals')
    t_voc = vocal.astype(np.float32)
    bass = prediction.get('bass')
    t_bass = bass.astype(np.float32)
    other = prediction.get('other')
    t_other = other.astype(np.float32)
    drums = prediction.get('drums')
    t_drums = drums.astype(np.float32) 
    
    t_end_stems[i] = {
    'drums':t_drums.sum(axis=1) / 2,
    'bass':t_bass.sum(axis=1) / 2, 
    'vocals':t_voc.sum(axis=1) / 2, 
    'other':t_other.sum(axis=1) / 2,
    }


#### Timestretch stems/mono audio

In [None]:
#timestretch stems for mashups
t_begin_stems_stretched = {}
t_end_stems_stretched = {}

for i, x in enumerate(audio_files):
    
    s_value = song_data[i].get('stretch_value')
    
    s_drums = librosa.effects.time_stretch(t_begin_stems[i]['drums'], s_value)
    s_bass = librosa.effects.time_stretch(t_begin_stems[i]['bass'], s_value)
    s_voc = librosa.effects.time_stretch(t_begin_stems[i]['vocals'], s_value)
    s_other = librosa.effects.time_stretch(t_begin_stems[i]['other'], s_value)
    
    t_begin_stems_stretched[i] = {
    'drums':s_drums, 
    'bass':s_bass, 
    'vocals':s_voc, 
    'other':s_other,
    }
    
    s_drums = librosa.effects.time_stretch(t_end_stems[i]['drums'], s_value)
    s_bass = librosa.effects.time_stretch(t_end_stems[i]['bass'], s_value)
    s_voc = librosa.effects.time_stretch(t_end_stems[i]['vocals'], s_value)
    s_other = librosa.effects.time_stretch(t_end_stems[i]['other'], s_value)
    
    t_end_stems_stretched[i] = {
    'drums':s_drums,
    'bass':s_bass, 
    'vocals':s_voc, 
    'other':s_other,
    }

In [None]:
#timestretch rest of tracks
tracks_tstretched = []

s_value = song_data[0].get('stretch_value')
tracks_tstretched.append(librosa.effects.time_stretch(audio_files_mono[0][:song_data[0]['end_start']], s_value))

for i in range(num_files-2):
    s_value = song_data[i+1].get('stretch_value')
    tracks_tstretched.append(librosa.effects.time_stretch(audio_files_mono[i+1][song_data[i+1]['start_end']:song_data[i+1]['end_start']], s_value))

s_value = song_data[num_files-1].get('stretch_value')
tracks_tstretched.append(librosa.effects.time_stretch(audio_files_mono[num_files-1][song_data[num_files-1]['start_end']:], s_value))

#### Creates mashups using stems

In [None]:
def resize_segments(median, stem):
    
    for i in range(num_files):
        cur_length = len(t_end_stems_stretched[i][stem])
        
        if cur_length > median:
            t_end_stems_stretched[i][stem] = t_end_stems_stretched[i][stem][:-1]
        if cur_length < median:
            l_s = t_end_stems_stretched[i][stem][-1]
            t_end_stems_stretched[i][stem] = np.append(t_end_stems_stretched[i][stem], l_s)
            
            
        cur_length = len(t_begin_stems_stretched[i][stem])
        if cur_length > median:
            t_begin_stems_stretched[i][stem] = t_begin_stems_stretched[i][stem][:-1]
            
        if cur_length < median:
            l_s = t_begin_stems_stretched[i][stem][-1]
            t_begin_stems_stretched[i][stem] = np.append(t_begin_stems_stretched[i][stem], l_s)
                 

In [None]:
#RESIZE STEMS SO ALL THE SAME SIZE

lengths = []
for i in range(num_files):
    length = len(t_end_stems_stretched[i]['vocals'])
    lengths.append(length)
median = int(stats.median(lengths))

resize_segments(median, 'drums')
resize_segments(median, 'vocals')
resize_segments(median, 'bass')
resize_segments(median, 'other')



In [None]:
#mashups consist of drums from current track/instrumental from next track
mashups = []
for i in range(num_files-1):

    stem1 = t_end_stems_stretched[i]['drums']
    stem2 = t_begin_stems_stretched[i+1]['vocals']
    stem3 = t_begin_stems_stretched[i+1]['other']
    stem4 = t_begin_stems_stretched[i+1]['bass']

    mashup = (stem1 + stem2 + stem3 + stem4)

    mashups.append(mashup)

In [None]:
#CrossFading Function, takes signal arrays s1, s2, and fadetime f and delay time d in samples
#Written by Sarah Staszkiel

def crossfade(s1,s2,f,d):
    
    #Add padded 0's to delay overlap of signals
    a = np.pad(s1[(len(s1)-f):], (0, d), 'constant', constant_values=(0, 0))
    b = np.pad(s2[:f], (d, 0), 'constant', constant_values=(0, 0))
    c = []
    l = f+d
    
    #Fade out s1 and fade in s2
    for i in range(0, l):
        m = i/f
        a[i] = a[i]*(1-m) #Decreases from 1 to 0 over fade duration
        b[(l-1)-i] = b[(l-1)-i]*(1-m) #Increase from 0 to 1 over fade duration
        
    a = a + b #Overlap both faded signals
    c = np.concatenate((s1[:len(s1)-f],a,s2[f:]), axis=0)
   
    #For testing
    #ipd.display(ipd.Audio(c,rate=srate))
    return c

In [None]:
cross_fade = mean_tempo/60
delay = 0.1 # should be less than cross_fade

In [None]:
crsf = int(sec_convert(cross_fade))
d = sec_convert(delay)

In [None]:
playlist_file = tracks_tstretched[0]
playlist_file = crossfade(playlist_file,mashups[0],crsf,d)

for i in range(num_files-2):
    
    playlist_file = crossfade(playlist_file, tracks_tstretched[i+1],crsf, d)
    playlist_file = crossfade(playlist_file, mashups[i+1], crsf, d)

playlist_file = crossfade(playlist_file, tracks_tstretched[num_files-1], crsf, d)

In [None]:
playlist_array = np.asarray(playlist_file)
wav.write('test_playlist_lib.wav', srate, playlist_array)

In [None]:
#END ------------------------------