### """
Produce a DataFrame of dumb flattened chromograms.

Processes a universe of midi-generated mp3 files
into flattened arrays representing pitches over 30-second timeframes.

While doing so, extracts and pickles 6 Librosa arrays per example that represent other useful information.
"""

from __future__ import print_function
import pandas as pd
import numpy as np
import os

import librosa
import librosa.display
import MTheory as mt

In [16]:
main_path = '/Users/ilanmoscovitz/github/sf18_ds11/projects/03-mcnulty/Music/'

music_universe = {'Bach': ['Art_of_Fugue','Goldberg_Variations','Partitas','Sinfonia','Two_Part_Inventions','WTC1','WTC2','Other'],
                  'Mozart': ['Sonatas','Sonatinas','Fugue','Variations','Other']}

df_filename = '/Users/ilanmoscovitz/github/sf18_ds11/projects/03-mcnulty/Raw_Arrays.pkl'

In [3]:
df = pd.DataFrame()
print(df.shape)
extraction_keys = ['y','sr','y_harmonic','y_percussive','chroma', 'sparse']

(0, 0)


In [19]:
# Adds songs to df by composer, collection, song
# Splits each song into segments (seconds=30) and flattens each segment into an example in 1D space

# You need to return and assign df because pandas has no option to use modifier df.append as a modifier
def make_extraction(df, main_path, music_universe, extraction_to_add='chroma', seconds=30, sr=22050, hop_length=512):
    for composer, collections in music_universe.items():
        for collection in collections:
            df = add_collection(df, main_path, composer, collection, extraction_to_add, seconds, sr, hop_length) 
            print('Shape:',df.shape,'\n')
    
    # The indices get screwy from all the appending. Let's fix them.
    df = df.reset_index(drop=True)
    return df

In [25]:
def add_collection(df, main_path, composer, collection, extraction_to_add='chroma', seconds=30, sr=22050, hop_length=512):
    
    # Decide what files to do what with
    directory = main_path + composer + '/' + collection + '/'
    print(directory)
    songs = [song_file[:-4] for song_file in os.listdir(directory) if song_file.endswith('.mp3')]
    to_add = [song for song in songs if len(df)>0 and song not in df['Song'].unique() and collection not in df['Collection'].unique()]
    found = [song for song in songs if song not in to_add]
    
    print(composer, collection)
    print('Already added to DF:', found)
    print('\nAdding:', to_add)
    
    # For each song not in df, add its stack of flattened numpy arrays to the dataframe
    for song in songs:
        song_name = song
        audio_path = directory + song + '.mp3'
        new_examples_np = song_to_examples(audio_path, extraction_to_add=extraction_to_add, seconds=seconds, sr=sr, hop_length=hop_length, flatten=True)
        new_examples_df = pd.DataFrame(data=new_examples_np)
        new_examples_df['Song'] = song_name
        new_examples_df['Composer'] = composer
        new_examples_df['Collection'] = collection
        df = df.append(new_examples_df)
        #print(df.shape)
    # Check to make sure all parsed audio has been pickled
    for song in found:
        mt.load_or_parse(directory+song, extraction_keys)
        
    print('Done :)')
    return df

In [26]:
def song_to_examples(audio_path, extraction_to_add='chroma', seconds=30, sr=22050, hop_length=512, flatten=False):
    extractions = mt.load_or_parse(audio_path, extraction_keys)
    array = extractions[extraction_to_add]
    song_segments = split_array(array, seconds=seconds, sr=sr, hop_length=hop_length)
    if not flatten:
        examples = song_segments
    else:
        examples = flatten_arrays(song_segments)
    return examples

In [7]:
def split_array(array, seconds=30, sr=22050, hop_length=512):
   
    # Number of frames per segment
    frames_per_seg = librosa.core.time_to_frames(seconds, sr=sr, hop_length=hop_length, n_fft=None)
    
    # Number of splits
    n_segs = array.shape[1] // frames_per_seg
    
    # Make resulting list of array segments  
    return [array[:,(segment_i*frames_per_seg):((segment_i+1)*frames_per_seg)] for segment_i in range(n_segs)]

In [8]:
def flatten_arrays(segments):
    return np.stack([segment.flatten('F') for segment in segments])

In [9]:
df = make_extraction(df, main_path, music_universe)

/Users/ilanmoscovitz/github/sf18_ds11/projects/03-mcnulty/Music/Bach/Art_of_Fugue/
Bach Art_of_Fugue
Already added to DF: ['can1', 'can2', 'can3', 'can4', 'cnt1', 'cnt2', 'cnt3', 'dou1', 'dou2', 'inver1', 'inver2', 'mir1', 'mir2', 'reg1', 'reg2', 'tri1', 'tri2', 'unfin']

Adding: []
Done :)
Shape: (156, 15495) 

/Users/ilanmoscovitz/github/sf18_ds11/projects/03-mcnulty/Music/Bach/Goldberg_Variations/
Bach Goldberg_Variations
Already added to DF: []

Adding: ['aria', 'var1', 'var10', 'var11', 'var12c4', 'var13', 'var14', 'var15c5', 'var16', 'var17', 'var18c6', 'var19', 'var2', 'var20', 'var21c7', 'var22', 'var23', 'var24c8', 'var25', 'var26', 'var27c9', 'var28', 'var29', 'var30', 'var3c1', 'var4', 'var5', 'var6c2', 'var7', 'var8', 'var9c3']
Done :)
Shape: (268, 15495) 

/Users/ilanmoscovitz/github/sf18_ds11/projects/03-mcnulty/Music/Bach/Partitas/
Bach Partitas
Already added to DF: []

Adding: ['all1', 'all2', 'cap2', 'cou1', 'cou2', 'gig1', 'men1', 'pre1', 'ron2', 'sar1', 'sar2', 'sin2

In [10]:
df.shape

(1391, 15495)

In [12]:
df[df['Composer']=='Bach'].shape

(691, 15495)

In [13]:
df[df['Composer']=='Mozart'].shape

(700, 15495)

In [14]:
df.to_csv('Raw_Arrays.csv')