<a href="https://colab.research.google.com/github/aurel-au-velin-olymp/v2m/blob/master/vox2music.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#vox2music

In [0]:
#@title Installation { display-mode: "form" }
#@test {"output": "ignore"}

print('Installing...this may take up to a minute...')
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!apt-get install sox
!gsutil -q -m cp gs://magentadata/soundfonts/Yamaha-C5-Salamander-JNv5.1.sf2 /content/
!pip install -qU pyfluidsynth pretty_midi
!pip install -qU magenta
!pip install soundfile
!pip install pydub
!pip install pysndfx

!git clone https://github.com/aurel-au-velin-olymp/v2m.git
!pip install colabutils

# !wget http://freepats.zenvoid.org/Piano/SalamanderGrandPiano/SalamanderGrandPianoV3+20161209_44khz16bit.tar.xz
# !tar -xf SalamanderGrandPianoV3+20161209_44khz16bit.tar.xz

# Hack to allow python to pick up the newly-installed fluidsynth lib. 
# This is only needed for the hosted Colab environment.


ipd.clear_output()

print('🎉 Done!')


In [0]:
#@title imports { display-mode: "form" }

import ctypes.util
orig_ctypes_util_find_library = ctypes.util.find_library
def proxy_find_library(lib):
  if lib == 'fluidsynth':
    return 'libfluidsynth.so.1'
  else:
    return orig_ctypes_util_find_library(lib)
ctypes.util.find_library = proxy_find_library

from google.colab import files

%tensorflow_version 1.x
import magenta.music as mm
import magenta
import tensorflow

import mido
from mido import MidiFile
import librosa

import IPython.display as ipd
import matplotlib.pyplot as plt

import numpy as np
import os

# Necessary until pyfluidsynth is updated (>1.2.5).
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

def play(note_sequence):
  mm.play_sequence(note_sequence, synth=mm.fluidsynth)

In [0]:
#@title Choose piece/composer { display-mode: "form" }
midi_name = "Beethoven Moonlight" #@param ["Beethoven Moonlight", "Beethoven Elise", "Bach Prelude", 'Chopin Nocturne No. 20']

midi_dict = {
    'Beethoven Elise' : 'v2m/Elise.mid',
    'Beethoven Moonlight' : 'v2m/mond_1_format0.mid',
    'Bach Prelude' : 'v2m/wtk1-prelude1.mid',
    'Chopin Nocturne No. 20' : 'v2m/frederic-chopin-nocturne-no20.mid'
             }

In [0]:
#@title Choose, upload or record audio { run: "auto" }
wav_name =  "Devil's Advocate" #@param ["Kinski Laesterzungen", "Django", "Pulp Fiction", "Devil's Advocate", "record your own", "upload a file"]
skip_seconds = 0 #@param {min:0, max:127, step:1}


if wav_name == 'record your own':
  from colabutils import audio
  audio.record_and_save()
  wav, sr = librosa.load('audio.wav', sr=44100, offset=skip_seconds)

elif wav_name == 'upload a file':
  from google.colab import files
  uploaded = files.upload()
  wav, sr = librosa.load(list(uploaded)[0], sr=44100, offset=skip_seconds)

elif wav_name != 'record your own':
  wav_dict = {
      "Kinski Laesterzungen" : 'v2m/klaus-kinski-liest-villon-die-lasterzungen.mp3',
      "Pulp Fiction" : 'v2m/Pulp Fiction - Jules and his Bible Verse.mp3',      
      "Wolf Of Wall Street" : 'v2m/The Wolf of Wall Street Inspirational Speech HD.mp3',
      "Django" : 'v2m/Django Unchained - You Must Shake My Hand [BIG SPOILER].mp3', 
      "Devil's Advocate" : 'v2m/Al Pacinos speech about God (The Devils Advocate).flac' 
              }
  wav, sr = librosa.load(wav_dict[wav_name], sr=44100, offset=skip_seconds)

try:
  wav /= np.max(abs(wav))
  print('Preview of 10 Seconds:')

except:
  print('Something went wrong. If you are trying to record, try again. If you uploaded a file make sure it is a proper audio file.')

ipd.Audio(wav[0:sr*10], rate=sr) 

In [0]:
#@title Choose performance parameters { run: "auto", display-mode: "form" }

normalize_range = True #@param {type:"boolean"}

min_velocity = 40 #@param {type:"slider", min:0, max:127, step:1}
max_velocity = 127 #@param {type:"slider", min:0, max:127, step:1}

smooth_curve = True #@param {type:"boolean"}
filter_width = 4 #@param {type:"slider", min:0, max:20, step:0.1}
delay_compensation = 0.4 #@param {type:"slider", min:0, max:3, step:0.1}


clip_range = True #@param {type:"boolean"}

bias = 0 #@param {type:"slider", min:-127, max:127, step:1}
min_velocity_clip = 40 #@param {type:"slider", min:0, max:127, step:1}
max_velocity_clip = 127 #@param {type:"slider", min:0, max:127, step:1}

nrg = librosa.feature.rms(wav, center=True).repeat(512) * 100 #+ min_velocity

if normalize_range:
  nrg = np.interp(nrg, (nrg.min(), nrg.max()), (min_velocity, max_velocity))
  plt.plot(wav*10)
  plt.plot(nrg)    
  print('normalized intensity curve:')
  plt.show()


if smooth_curve:
  
  import scipy.signal as sg

  h = sg.get_window('triang', filter_width * 10000)
  nrg = sg.convolve(nrg, h / h.sum())
  nrg[nrg<min_velocity] = min_velocity
  nrg = nrg[int(delay_compensation*44100):]
  plt.plot(wav*10)
  plt.plot(nrg)    
  print('smoothed intensity curve:')
  plt.show()


if clip_range:


  nrg = np.clip(nrg+bias, min_velocity_clip, max_velocity_clip)

  plt.plot(wav*10)
  plt.plot(nrg)    
  print('clipped intensity curve:')
  plt.show()

renormalize = True #@param {type:"boolean"}

if renormalize:
  nrg = np.interp(nrg, (nrg.min(), nrg.max()), (min_velocity, max_velocity))


  plt.plot(wav*10)
  plt.plot(nrg)    
  print('renormalized intensity curve:')
  plt.show()

In [0]:
#@title { run: "auto", display-mode: "form" }


from mido import tick2second
from mido import Message, MidiFile, MidiTrack


warnings.filterwarnings("ignore")

print('Performing silently...')

frame_history = 1 #@param {type:"slider", min:0, max:3, step:0.1}
frame_future = 0 #@param {type:"slider", min:0, max:3, step:0.1}

velocity = 20
mods = [0]*10
vels = [velocity]*10
modulation = 0

current_time = 0
min_bpm = 23 #@param {type:"integer", min:10, max:300, step:1}
force_tempo = True #@param {type:"boolean"}

tempo = 0

mid = MidiFile(midi_dict[midi_name])


# convert to format 0
mid_tracks = mido.merge_tracks(mid.tracks)
mid = MidiFile(ticks_per_beat=mid.ticks_per_beat)
mid.tracks.append(mid_tracks)

mid_new = MidiFile(ticks_per_beat=mid.ticks_per_beat)



# ?!?!?!?!?!?!?!?!?!?!?!?!?!?!
#current_time = 0

track = MidiTrack()
mid_new.tracks.append(track)

for msg in mid.tracks[0]:

  if msg.is_meta and 'tempo' in msg.type:
    if force_tempo:
      msg.tempo = mido.bpm2tempo(min_bpm)
    tempo = msg.tempo

  
  tempo_sensitivity = 2.36 #@param {type:"slider", min:0, max:3, step:0.01}
  tempo_mod = (1 / (1 + (modulation*tempo_sensitivity)*0.01))# + 0.4
  msg.time = int(msg.time * tempo_mod)

      
  current_time += tick2second(msg.time,
                              mid.ticks_per_beat,
                              tempo)
  current_sample = int(current_time*44100)

  agg_function = "max" #@param['max', 'mean', 'median']

  agg_function_dict = {'median': np.median,
                        'mean': np.mean,
                        'max': np.max
                        }
  agg_function = agg_function_dict[agg_function]

      
  
  if current_time > frame_history:
    try:
      current_nrg = agg_function(nrg[current_sample-int(frame_history*44100):current_sample+int(frame_future*44100)])
    except:
      #print('nrg set to 0')
      current_nrg = 0
  else:
    current_nrg = agg_function(nrg[0:current_sample+int(frame_history*44100)])

  modulation = np.nan_to_num(current_nrg)
      
  
  if not msg.is_meta and 'note' in msg.type:

      if modulation < min_velocity:
        modulation = min_velocity

      if msg.velocity > 0:
          
          velocity = modulation
          msg.velocity = int(velocity)

  track.append(msg)

mid_new.save('new.mid')
print('Performance complete.')

print('Rendering audio...')

note_seq = mm.midi_file_to_note_sequence('new.mid')

sf2 = 'v2m/SalC5Light2.sf2' #@param['v2m/SalC5Light2.sf2', 'Yamaha-C5-Salamander-JNv5.1.sf2']
music = mm.fluidsynth(note_seq, 44100, sf2_path=sf2)
print('Preview of 10 seconds:')
ipd.Audio(music[0:44100*10], rate=44100) #add comment

In [0]:
#@title Mix and listen { display-mode: "form" }

import soundfile as sf
import pydub
from pysndfx import AudioEffectsChain

music_level = 0.56 #@param {type:"slider", min:0.1, max:2, step:0.01}
speech_level = 1 #@param {type:"slider", min:0.0, max:2, step:0.01}
reverb = True #@param {type:"boolean"}
reverb_level = 5 #@param {type:"slider", min:0.0, max:10, step:0.1}



fx = (
    AudioEffectsChain()
#    .highshelf()
    .reverb(reverberance=50,
               hf_damping=50,
               room_scale=100,
               stereo_depth=100,
               pre_delay=20,
               wet_gain=reverb_level,
               wet_only=False)

#    .lowshelf()
)

mix = np.zeros(np.max([music.shape[0], wav.shape[0]]))
mix[:wav.shape[0]] += wav * speech_level
if reverb:
  mix[:music.shape[0]] += fx(music) * music_level
else:
  mix[:music.shape[0]] += music * music_level
mix = mix[:(np.min([music.shape[0], wav.shape[0]]))]
mix /= np.max(abs(mix))


sf.write('output.wav', mix, 44100)
sound = pydub.AudioSegment.from_wav('output.wav')
sound.export("output.mp3", format="mp3")

ipd.Audio('output.mp3') #add comment