# 2) Analyze source collection and target

In [1]:
import pandas as pd
import essentia
import essentia.standard as estd
import matplotlib.pyplot as plt
from IPython.display import display, Audio

DATAFRAME_FILENAME = 'dataframe.csv'
DATAFRAME_MOSAICING_FILENAME = 'dataframe_mosaicing.csv'
DATAFRAME_TARGET_FILE_FILENAME = 'dataframe_target.csv'
ANALYSIS_FRAME_SIZE = 11025  # 1/4th of a second in 44100khz audio
TARGET_SOUND_PATH = 'target.mp3'

In [2]:
def analyze_sound(sound_path, frame_size, freesound_id=0, sync_to_beats=False):
    analysis_output = []
    
    # Load audio file
    sound_path = sound_path
    loader = estd.MonoLoader(filename=sound_path)
    audio = loader()
    if frame_size is None:
        frame_size = len(audio)  # If no frame_size is given use no frames (analyze all audio at once)
    if frame_size % 2 != 0:
        frame_size = frame_size + 1 # Make frame size even
    
    
    if not sync_to_beats:
        # Cut singal in equally-spaced frames of frame_size
        frame_start_samples = range(0, len(audio)-frame_size, frame_size)
    else:
        # Sync start of analysis frames to detected beat locations
        beat_positions = estd.BeatTrackerDegara()(audio)
        beat_positions = [int(round(position * 44100)) for position in beat_positions]  # Positions from seconds to samples
        frame_start_samples = beat_positions
    
    frame_start_end_samples = zip(frame_start_samples[:-1], frame_start_samples[1:])
    
    # Iterate over signal frames
    for count, (fstart, fend) in enumerate(frame_start_end_samples):
        frame = audio[fstart:fend]
        frame_output = {
            'freesound_id': freesound_id,
            'id': '{0}_f{1}'.format(freesound_id, count),
            'path': sound['path'],
            'start_sample': fstart,
            'end_sample': fend,
        }
        
        # Extract loudness
        loudness_algo = estd.Loudness()
        loudness = loudness_algo(frame)
        frame_output['loudness'] = loudness / len(frame)  # Normnalize by length of frame

        # Extract MFCC coefficients
        w_algo = estd.Windowing(type = 'hann')
        spectrum_algo = estd.Spectrum()
        mfcc_algo = estd.MFCC()
        spec = spectrum_algo(w_algo(frame))
        _, mfcc_coeffs = mfcc_algo(spec)
        frame_output.update({'mfcc_{0}'.format(j): mfcc_coeffs[j] for j in range(0, len(mfcc_coeffs))})
        
        # Extract other features here and add to 'frame_output' dictionary
        # ...
        
        # Add frame analysis results to output
        analysis_output.append(frame_output)

    
    return analysis_output
    

In [3]:
# Load dataset created in previous notebook and analyze all sound files in it
df = pd.read_csv(open(DATAFRAME_FILENAME), index_col=0)
analyses = []
for i in range(0, len(df)):
    sound = df.iloc[i]
    print('Analyzing sound with id {0} [{1}/{2}]'.format(sound['freesound_id'], i + 1, len(df)))
    analysis_output = analyze_sound(sound['path'], ANALYSIS_FRAME_SIZE, freesound_id=sound['freesound_id'])
    analyses += analysis_output

# Store analysis results in a new pandas data frame and save it
df_mosaicing = pd.DataFrame(analyses)
df_mosaicing.to_csv(DATAFRAME_MOSAICING_FILENAME)
print('Saved mosaicing dataframe with {0} entries! {1}'.format(len(df_mosaicing), DATAFRAME_MOSAICING_FILENAME))

display(df_mosaicing)  # Show data frame contents
df_mosaicing.describe()  # Show statistics of numerical fields in data frame

Analyzing sound with id 337101 [1/40]
Analyzing sound with id 327666 [2/40]
Analyzing sound with id 413758 [3/40]
Analyzing sound with id 163459 [4/40]
Analyzing sound with id 456943 [5/40]
Analyzing sound with id 418106 [6/40]
Analyzing sound with id 420448 [7/40]
Analyzing sound with id 236016 [8/40]
Analyzing sound with id 115536 [9/40]
Analyzing sound with id 160093 [10/40]
Analyzing sound with id 415209 [11/40]
Analyzing sound with id 213889 [12/40]
Analyzing sound with id 341545 [13/40]
Analyzing sound with id 365061 [14/40]
Analyzing sound with id 110011 [15/40]
Analyzing sound with id 61259 [16/40]
Analyzing sound with id 412017 [17/40]
Analyzing sound with id 268795 [18/40]
Analyzing sound with id 66511 [19/40]
Analyzing sound with id 274989 [20/40]
Analyzing sound with id 212764 [21/40]
Analyzing sound with id 232289 [22/40]
Analyzing sound with id 174466 [23/40]
Analyzing sound with id 415878 [24/40]
Analyzing sound with id 69571 [25/40]
Analyzing sound with id 394830 [26/40

Unnamed: 0,end_sample,freesound_id,id,loudness,mfcc_0,mfcc_1,mfcc_10,mfcc_11,mfcc_12,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,path,start_sample
0,11026,337101,337101_f0,0.020832,-597.112244,158.210632,30.274387,22.498688,15.687822,-44.158089,-34.692230,-42.833469,-0.752230,-6.658066,-9.729980,6.437494,12.006414,files/337101_3474310-hq.ogg,0
1,22052,337101,337101_f1,0.016256,-668.758423,156.123444,20.839123,20.063843,16.288887,-23.997728,-39.755348,-60.834072,-21.577545,-24.134176,-23.102512,-1.488485,0.099745,files/337101_3474310-hq.ogg,11026
2,33078,337101,337101_f2,0.012588,-601.232788,141.837128,19.548870,16.477903,12.424200,-38.550240,-49.844357,-61.236237,-11.870529,-24.017391,-2.694828,-1.678556,-2.038914,files/337101_3474310-hq.ogg,22052
3,44104,337101,337101_f3,0.011591,-632.589355,158.566406,28.111647,10.307667,5.048298,-40.114712,-51.221264,-61.826286,-12.015526,-25.268690,-11.819839,-2.161942,3.207466,files/337101_3474310-hq.ogg,33078
4,55130,337101,337101_f4,0.000601,-914.115112,187.168457,30.003323,18.952166,6.336868,-10.944870,-65.605537,-40.010326,-24.484749,-15.054710,-6.210861,-1.591904,13.569178,files/337101_3474310-hq.ogg,44104
5,66156,337101,337101_f5,0.000111,-973.694702,145.803253,15.194027,4.489662,2.534843,0.920715,-51.397438,-31.269405,-9.695595,1.491444,1.127502,-3.660202,6.672525,files/337101_3474310-hq.ogg,55130
6,77182,337101,337101_f6,0.000102,-1001.539185,134.893860,13.615246,5.399506,0.733414,21.974854,-42.444435,-36.300289,-7.680016,3.572590,-1.946754,-2.695999,8.262453,files/337101_3474310-hq.ogg,66156
7,88208,337101,337101_f7,0.000090,-1002.449829,136.674133,9.007931,-2.301445,-3.588417,31.079166,-25.723122,-16.111828,8.756889,12.221958,2.438820,1.711967,10.090548,files/337101_3474310-hq.ogg,77182
8,99234,337101,337101_f8,0.000083,-1003.050903,138.846344,14.662518,4.091270,3.573502,35.624454,-23.958782,-15.392635,11.677620,16.366123,8.779001,11.264034,18.604792,files/337101_3474310-hq.ogg,88208
9,110260,337101,337101_f9,0.000023,-1058.211792,89.990089,0.746212,-4.408676,-4.581413,41.030125,6.042957,0.819717,9.592087,12.963734,9.244997,6.149471,4.719841,files/337101_3474310-hq.ogg,99234


Unnamed: 0,end_sample,freesound_id,loudness,mfcc_0,mfcc_1,mfcc_10,mfcc_11,mfcc_12,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,start_sample
count,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0,959.0
mean,327135.3,237233.055266,0.00172,-978.640291,81.010531,1.214316,0.289809,0.301817,-2.047203,1.915709,-4.143929,1.718351,-4.95941,0.34761,1.316155,1.186256,316109.3
std,311967.4,153555.82266,0.003404,132.913177,80.214276,12.970225,12.736757,12.571401,51.749146,44.951839,38.640949,23.109111,26.60116,20.889971,16.261679,14.166753,311967.4
min,11026.0,17588.0,0.0,-1138.420044,-177.766983,-49.06609,-52.182461,-45.215298,-173.991959,-118.962502,-111.655014,-87.335983,-80.974525,-66.794678,-55.479149,-53.022079,0.0
25%,88208.0,69571.0,1.8e-05,-1081.410889,25.453747,-5.965124,-6.357483,-6.422279,-27.232433,-29.293694,-25.699627,-8.639053,-17.841375,-9.085033,-6.868118,-6.053799,77182.0
50%,220520.0,267454.0,0.000174,-1009.580688,69.03965,0.746212,-3.1e-05,-3.8e-05,6.968391,6.51157,0.527466,3.094543,-2.7e-05,0.422066,0.611984,0.823357,209494.0
75%,474118.0,365061.0,0.001771,-909.205475,141.645744,8.628618,7.756748,7.10564,27.163647,26.549282,16.198593,14.041843,10.200161,9.640993,8.937794,8.484085,463092.0
max,1301068.0,456943.0,0.033144,-464.476105,275.472717,62.211163,57.13884,51.825665,120.440979,146.857681,116.918793,85.776131,91.547859,83.700592,63.736774,59.836823,1290042.0


In [4]:
# Analyze target file and store results in dataframe
print('Analyzing target sound {0}'.format(TARGET_SOUND_PATH))
target_analysis = analyze_sound(TARGET_SOUND_PATH, frame_size=ANALYSIS_FRAME_SIZE, sync_to_beats=True)
df_target = pd.DataFrame(target_analysis)
df_target.to_csv(DATAFRAME_TARGET_FILE_FILENAME)
print('Saved target dataframe with {0} entries! {1}'.format(len(df_target), DATAFRAME_TARGET_FILE_FILENAME))

display(df_target)  # Show data frame contents
df_target.describe()  # Show statistics of numerical fields in data frame

Analyzing target sound target.mp3
Saved target dataframe with 30 entries! dataframe_target.csv


Unnamed: 0,end_sample,freesound_id,id,loudness,mfcc_0,mfcc_1,mfcc_10,mfcc_11,mfcc_12,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,path,start_sample
0,41984,0,0_f0,0.00826,-629.344543,63.100639,1.027674,4.535658,10.312275,17.024109,45.845253,10.505051,15.517252,12.290071,11.492983,3.209167,5.187899,files/365663_6142149-hq.ogg,21504
1,62976,0,0_f1,0.007774,-631.578491,61.920559,3.65427,8.147383,9.716425,11.150368,40.805496,9.371443,17.336075,13.377789,15.806234,13.105413,10.419749,files/365663_6142149-hq.ogg,41984
2,83968,0,0_f2,0.006835,-986.919373,104.836754,15.941776,11.863121,12.876617,46.74033,83.946228,49.568211,39.822525,36.966942,27.157152,19.489506,13.565142,files/365663_6142149-hq.ogg,62976
3,104448,0,0_f3,0.008152,-627.085022,59.512253,12.093395,20.223137,19.062346,16.212429,46.014381,4.73015,8.108461,6.874325,8.300098,1.230604,8.229359,files/365663_6142149-hq.ogg,83968
4,124928,0,0_f4,0.008415,-629.10498,52.183273,12.480587,15.200541,11.207373,12.434874,44.542007,6.684389,10.561899,10.579035,14.969387,11.376122,13.512585,files/365663_6142149-hq.ogg,104448
5,145408,0,0_f5,0.009921,-628.882935,56.651817,9.395866,14.664248,13.366125,19.667345,45.19199,6.673706,13.965466,10.452045,13.615303,9.816055,13.154787,files/365663_6142149-hq.ogg,124928
6,165888,0,0_f6,0.008661,-651.367249,62.562973,14.310904,20.321941,19.399033,13.835413,45.557495,6.733898,13.618971,11.417652,15.199984,6.379295,11.128438,files/365663_6142149-hq.ogg,145408
7,186368,0,0_f7,0.008489,-622.0578,49.210663,10.119148,9.536124,0.685066,9.488411,35.537708,7.094286,18.961218,17.878235,19.512539,18.107042,21.163429,files/365663_6142149-hq.ogg,165888
8,207360,0,0_f8,0.008514,-622.757507,46.615791,15.386024,7.579311,-1.634836,9.151464,41.338882,9.862566,20.552546,18.041506,16.341288,20.479317,26.571604,files/365663_6142149-hq.ogg,186368
9,228352,0,0_f9,0.007006,-684.282349,41.937206,15.731857,10.478247,2.920403,16.823988,50.76458,21.622339,32.555954,19.947035,11.350306,15.677166,22.066216,files/365663_6142149-hq.ogg,207360


Unnamed: 0,end_sample,freesound_id,loudness,mfcc_0,mfcc_1,mfcc_10,mfcc_11,mfcc_12,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,start_sample
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,341947.733333,0.0,0.008166,-662.250203,59.373879,10.411492,10.078388,8.244133,17.430106,47.470443,11.203123,16.924158,13.779179,15.125332,11.681878,13.512795,321262.933333
std,181986.701892,0.0,0.000811,88.506688,16.088252,4.269899,5.569283,6.48777,9.700876,10.075309,10.292823,8.621141,7.44056,5.104491,6.531638,7.002696,181966.633731
min,41984.0,0.0,0.006428,-986.919373,39.92968,1.027674,-2.960445,-5.752552,8.914869,35.537708,0.773233,5.108322,3.59922,6.936365,0.873417,3.97296,21504.0
25%,191616.0,0.0,0.007806,-645.536514,48.387495,6.986282,7.278742,3.361679,11.412165,41.853357,6.259758,13.024476,10.483793,11.385975,6.680524,8.441248,171008.0
50%,342016.0,0.0,0.008346,-630.913086,60.347824,10.828327,9.298197,9.157687,15.961114,45.534229,8.123589,15.309628,13.129109,15.01939,10.777628,10.774094,321536.0
75%,491520.0,0.0,0.008492,-627.427505,62.681814,13.498277,13.405146,12.781963,19.405698,49.029829,10.651735,18.611151,16.403182,17.848604,16.593272,18.837474,471040.0
max,642048.0,0.0,0.009927,-620.23999,116.034943,16.709757,20.321941,19.399033,51.223942,83.946228,49.568211,44.133682,36.966942,27.157152,24.988342,27.28373,621568.0
