<a href="https://colab.research.google.com/github/jmineroff/Beatle-Basslines/blob/master/BassGeneration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Optional code cell for running in Google Colab
# Links to local git folder in Google Drive and installs modules

from google.colab import drive
drive.mount('/content/drive')

!pip install pypianoroll
!pip install AudioConverter
!apt install fluidsynth
!cp /usr/share/sounds/sf2/FluidR3_GM.sf2 ./font.sf2
!pip install midi2audio

import os
try:
  os.chdir("drive/My Drive/Beatle-Basslines") # Local git path
except Exception:
  pass

In [0]:
# Initialization

from midi2audio import FluidSynth as fs
from IPython.display import display, Audio
import numpy as np
import pandas as pd
from pypianoroll import Multitrack, Track
from matplotlib import pyplot as plt
import os
import sys

plt.rcParams["figure.figsize"] = (20,10)

In [0]:
# Helper functions

def write_soundfile(midifile, output): # Write midi to an audio file
  fs(sound_font="font.sf2", sample_rate=11025).midi_to_audio(midifile, output)


def piano_plot(pianofile): # 
  fig, axs = pianofile.plot()
  plt.show()


def parse_midi_dir(list_songs=True, list_tracks=False): # Parse directory of raw midi files
  num_midi = 0

  for subdir, dirs, files in os.walk('RawMIDI/'):
    for file in files:
      num_midi += 1
      
      if not file.endswith(".mid"):
        continue
        
      if list_songs:
        print(subdir.split('/')[-1],'-',file)
      
      if list_tracks:
        filepath = subdir + os.sep + file
        temp = Multitrack(filepath)
        for track in temp.tracks: # 
          print('...',track.name)

  print('Track count:',num_midi)

def tempo_to_progress(tempo): # Get song progress (0-1) from beat-uniform tempo array
  return np.cumsum(1/tempo) / np.sum(1/tempo)


def add_songs_to_df(df, partial_track_match=False, binarize=False): # Add all songs to dataframe
  note_limits = {'Drums': [127,0], 'Bass': [127,0], 'Vocals': [127,0], 'Rhythm': [127,0], 'Lead': [127,0]}
  note_counts = {'Drums': 0, 'Bass': 0, 'Vocals': 0, 'Rhythm': 0, 'Lead': 0}
      
  for subdir, dirs, files in os.walk('RawMIDI/'): # Walk through all files in directory
    for file in files:
      
      track_to_channel = {'Drums': [], 'Bass': [], 'Vocals': [], 'Rhythm': [], 'Lead': [], 'VocalsAll': []}
      
      if not file.endswith(".mid"): # Only process MIDI files
        continue

      print('Processing', subdir.split('/')[-1], '-', file) # Status update

      filepath = subdir + os.sep + file
      temp = Multitrack(filepath)

      for track_idx, track in enumerate(temp.tracks): # Parse all tracks in midi file
        for key in track_to_channel: # Compare all keys to track name
          if ( track.name == key ) or ( partial_track_match and track.name.startswith(key) ): # Exact or partial track-key match (depending on 'partial_track_match')
            track_to_channel[key].append(track_idx)

      if not track_to_channel['Vocals']: # Copy index from 'VocalsAll' to 'Vocals' if necessary
        track_to_channel['Vocals'] = track_to_channel['VocalsAll']
      
      del track_to_channel['VocalsAll']

      for key in track_to_channel: # Set index of nonexistent tracks to -1
        if not track_to_channel[key]:
          track_to_channel[key].append(-1)
      
      if binarize:
        temp.binarize()

      full_pianoroll = temp.get_stacked_pianoroll()
      full_pianoroll = np.append(full_pianoroll, np.zeros((*full_pianoroll.shape[0:2],1)), axis=2) # Add extra track of zeros for nonexistent tracks

      tempo = temp.tempo
      downbeat = temp.downbeat

      progress = tempo_to_progress(tempo)

      pianorolls = {}

      # Get instrument pianorolls from full pianoroll using dictionary indices
      for key in track_to_channel: # 
        
        if binarize:
          pianorolls[key] = np.amax(full_pianoroll[:,:,track_to_channel[key]], axis=2)
        else:
          pianorolls[key] = np.sum(full_pianoroll[:,:,track_to_channel[key]], axis=2)

        note_locations = np.flatnonzero(np.amax(pianorolls[key], axis=0))
        note_count = np.amax(np.count_nonzero(pianorolls[key], axis=1))

        #print(len(note_locations))
        #print(note_locations)
        #print(note_count)
        
        if not note_locations.any():
          continue

        high_note = np.amin(note_locations)
        low_note = np.amax(note_locations)
        if high_note < note_limits[key][0]:
          note_limits[key][0] = high_note
        if low_note > note_limits[key][1]:
          note_limits[key][1] = low_note
        if note_count > note_counts[key]:
          note_counts[key] = note_count
      
      # Add new entry to dataframe
      df.loc[len(df)] = [file.split('.')[0], subdir.split('/')[-1], pianorolls['Drums'], pianorolls['Bass'], pianorolls['Vocals'], pianorolls['Rhythm'], pianorolls['Lead'], tempo, downbeat, progress]

  return note_limits, note_counts


def manual_song_test(df, song_name): # Generate original and recombined audio for manual comparison
  # Build track from original MIDI file
  filepath = 'RawMIDI/' + df.loc[df.Song == song_name].Album.values[0] + '/' + song_name + '.mid'

  original = Multitrack(filepath)
  #piano_plot(original)

  original.write('temp/original.mid')
  write_soundfile('temp/original.mid', 'temp/original.mp3')
  print('Original File')
  display(Audio('temp/original.mp3'))
  
  # Build track from dataframe
  drum_track = Track(df.loc[df.Song == song_name].Drums.values[0], is_drum=True, name='Drums')
  bass_track = Track(df.loc[df.Song == song_name].Bass.values[0], program=34, is_drum=False, name='Bass')
  vocal_track = Track(df.loc[df.Song == song_name].Vocals.values[0], program=73, is_drum=False, name='Vocals')
  rhythm_track = Track(df.loc[df.Song == song_name].Rhythm.values[0], program=24, is_drum=False, name='Rhythm')
  lead_track = Track(df.loc[df.Song == song_name].Lead.values[0], program=26, is_drum=False, name='Lead')
  tempo = df.loc[df.Song == song_name].Tempo.values[0]
  downbeat = df.loc[df.Song == song_name].Downbeat.values[0]

  bass_track.transpose(-12) # Only intermittently needed for proper playback with FluidSynth Soundfont - don't use for exporting raw MIDI files
  
  recombined = Multitrack(tracks=[drum_track, bass_track, vocal_track, rhythm_track, lead_track], tempo=tempo, downbeat=downbeat)

  recombined.write('temp/recombined.mid')
  write_soundfile('temp/recombined.mid', 'temp/recombined.mp3')
  print('Recombined File')
  display(Audio('temp/recombined.mp3'))

In [0]:
# Write all song/track names to a text file
from contextlib import redirect_stdout

with open('TrackList.txt', 'w') as f:
  with redirect_stdout(f):
    parse_midi_dir(list_tracks=True)

In [0]:
# Initialize dataframe
col_names = ['Song','Album','Drums','Bass','Vocals','Rhythm','Lead','Tempo','Downbeat','Progress']
raw_songs_df = pd.DataFrame(columns = col_names)
raw_songs_df

In [0]:
# Populate dataframe
note_limits, note_counts = add_songs_to_df(raw_songs_df, partial_track_match=True)

In [0]:
# Check df
print('Note limits:', note_limits)
print('Note counts:', note_counts)
raw_songs_df.head()

In [0]:
# Compare original MIDI audio with recombined df audio
manual_song_test(raw_songs_df, 'Rain')