 ###### Classes in Subset_Creators.subsetters can be used to automatically create subsets of a given dataset processed into HVO_Sequence (>= 0.3.0)
 

## 1. GrooveMidiSubsetter 
Use this class to create multiple subsets with specific requirements for each subset

In [1]:
from subsetters import GrooveMidiSubsetter

True


### Procedure

### <u> A. </u>  Create a list of filter dictionaries for creating the subsets

##### Filters are lists of dictionaries. Each element in the list (which is a dict) refers to a set of specifications 

##### THE VALUES IN THE DICTIONARY SHOULD ALSO BE LISTS (SEE FORMATTING BELOW). THIS ENSURES THAT MULTIPLE SPECIFICATIONS PER FEATURE IS ALLOWED IN THE SUBSETTING PROCESS




GROOVEMIDI_FILTER_TEMPLATE = [{

    "drummer": ,                                    # ["drummer1", ..., and/or "session9"]
    "session": ,                                    # ["session1", "session2", and/or "session3"]
    "loop_id": ,
    "master_id": ,
    "style_primary": ,                              #   ["afrobeat", "afrocuban", "blues", "country", "dance", "funk",                                                          "gospel", "highlife", "hiphop", "jazz",
                                                         "latin", "middleeastern", "neworleans", "pop", 
                                                         "punk", "reggae", "rock", "soul"]
    "style_secondary": ,
    "bpm": None,                                    # [(range_0_lower_bound, range_0_upper_bound), ...,
                                                    #   (range_n_lower_bound, range_n_upper_bound)]
    "beat_type": ,                                  # ["beat" or "fill"]
    "time_signature": ,                             # ["4-4", "3-4", "6-8"]
    "full_midi_filename": ,                         # list_of full_midi_filenames
    "full_audio_filename": ,                        # list_of full_audio_filename
    "number_of_instruments": ,                      # [(n_instruments_lower_bound, n_instruments_upper_bound), ...,
                                                    #  (n_instruments_lower_bound, n_instruments_upper_bound)]
}]  






##### <u> <i> Example 1 </i> </u>

Create two subsets: first one with rock and funk samples only and the second one with reggae only

Filter = [{"style_primary": ["rock", "funk"]}, {"style_primary": ["reggae"]}, ]

##### <u> <i> Example 2 </i> </u>

Create two subsets like above but only get the beats in the first one and fills in the second

Filter = [{"style_primary": ["rock", "funk"], "beat_type": ["beat"]}, {"style_primary": ["reggae"], "beat_type": ["fill"]}, ]






### <u> B. </u>  Create an instance of the GrooveMidiSubsetter and call the create_subsets() method


------------------
____________

## More Detailed Examples Below

##### 1. A. Load entire set without any filters
----
Step I. Create an instance of GrooveMidiSubsetter and set the path to pickle file as below

Step II. set list_of_filter_dicts_for_subsets to None to ensure that the dataset is not filtered

In [None]:
gmd_all = GrooveMidiSubsetter(
            pickle_source_path="../datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs",
            subset="GrooveMIDI_processed_train",
            hvo_pickle_filename="hvo_sequence_data.obj",
            list_of_filter_dicts_for_subsets=None,
    )
tags, subsets = gmd_all.create_subsets()
for ix, (tag, subset) in enumerate(zip(tags, subsets)):
    print("Subset {} --> Tag {} --> n_samples {}".format(ix, tag, len(subset)))
   

##### 1. B. Create style specific subsets
----
from Subset_Creators.subsetters import GrooveMidiSubsetter

In [2]:
styles = ["afrobeat", "afrocuban", "blues", "country", "dance",
          "funk", "gospel", "highlife", "hiphop", "jazz",
          "latin", "middleeastern", "neworleans", "pop",
          "punk", "reggae", "rock", "soul"]


list_of_filter_dicts_for_subsets = [{"style_primary": [style]} for style in styles]

gmd_by_style = GrooveMidiSubsetter(
            pickle_source_path="../datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs",
            subset="GrooveMIDI_processed_train",
            hvo_pickle_filename="hvo_sequence_data.obj",
            list_of_filter_dicts_for_subsets=list_of_filter_dicts_for_subsets,
    )
tags_by_style, subsets_by_style = gmd_by_style.create_subsets()
for ix, (tag, subset) in enumerate(zip(tags_by_style, subsets_by_style)):
    print("Subset {} --> Tag {} --> n_samples {}".format(ix, tag, len(subset)))

Subset 0 --> Tag ['afrobeat'] --> n_samples 779
Subset 1 --> Tag ['afrocuban'] --> n_samples 773
Subset 2 --> Tag ['blues'] --> n_samples 87
Subset 3 --> Tag ['country'] --> n_samples 122
Subset 4 --> Tag ['dance'] --> n_samples 548
Subset 5 --> Tag ['funk'] --> n_samples 1636
Subset 6 --> Tag ['gospel'] --> n_samples 0
Subset 7 --> Tag ['highlife'] --> n_samples 47
Subset 8 --> Tag ['hiphop'] --> n_samples 739
Subset 9 --> Tag ['jazz'] --> n_samples 2323
Subset 10 --> Tag ['latin'] --> n_samples 2970
Subset 11 --> Tag ['middleeastern'] --> n_samples 118
Subset 12 --> Tag ['neworleans'] --> n_samples 734
Subset 13 --> Tag ['pop'] --> n_samples 120
Subset 14 --> Tag ['punk'] --> n_samples 94
Subset 15 --> Tag ['reggae'] --> n_samples 161
Subset 16 --> Tag ['rock'] --> n_samples 5563
Subset 17 --> Tag ['soul'] --> n_samples 294


##### 1. C. Create style specific subsets BUT ignore fills
----


In [21]:
list_of_filter_dicts_for_subsets = [[]].append([{"style_primary": [style], "beat_type": ["beat"]} for style in styles])

gmd_by_styl_and_beat = GrooveMidiSubsetter(
            pickle_source_path="../datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs",
            subset="GrooveMIDI_processed_train",
            hvo_pickle_filename="hvo_sequence_data.obj",
            list_of_filter_dicts_for_subsets=list_of_filter_dicts_for_subsets,
    )

tags_by_style_and_beat, subsets_by_style_and_beat = gmd_by_styl_and_beat.create_subsets()
for ix, (tag, subset) in enumerate(zip(tags_by_style_and_beat, subsets_by_style_and_beat)):
    print("Subset {} --> Tag {} --> n_samples {}".format(ix, tag, len(subset)))

Subset 0 --> Tag ['afrobeat']_AND_['beat'] --> n_samples 779
Subset 1 --> Tag ['afrocuban']_AND_['beat'] --> n_samples 772
Subset 2 --> Tag ['blues']_AND_['beat'] --> n_samples 87
Subset 3 --> Tag ['country']_AND_['beat'] --> n_samples 119
Subset 4 --> Tag ['dance']_AND_['beat'] --> n_samples 548
Subset 5 --> Tag ['funk']_AND_['beat'] --> n_samples 1578
Subset 6 --> Tag ['gospel']_AND_['beat'] --> n_samples 0
Subset 7 --> Tag ['highlife']_AND_['beat'] --> n_samples 47
Subset 8 --> Tag ['hiphop']_AND_['beat'] --> n_samples 712
Subset 9 --> Tag ['jazz']_AND_['beat'] --> n_samples 2300
Subset 10 --> Tag ['latin']_AND_['beat'] --> n_samples 2970
Subset 11 --> Tag ['middleeastern']_AND_['beat'] --> n_samples 118
Subset 12 --> Tag ['neworleans']_AND_['beat'] --> n_samples 721
Subset 13 --> Tag ['pop']_AND_['beat'] --> n_samples 110
Subset 14 --> Tag ['punk']_AND_['beat'] --> n_samples 94
Subset 15 --> Tag ['reggae']_AND_['beat'] --> n_samples 161
Subset 16 --> Tag ['rock']_AND_['beat'] --> n

##### 1. D. Create style specific subsets BUT ignore fills, Also with BPMs Below 60 and above 160 and minimum of 3 active voices
----


In [None]:
list_of_filter_dicts_for_subsets = [{"style_primary": [style], "beat_type": ["beat"], 
                                     "bpm":[(0, 60), (160, 10000)],
                                     "number_of_instruments": [(2, 10000)]
                                    } for style in styles]

gmd_by_styl_and_beat = GrooveMidiSubsetter(
            pickle_source_path="../datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs",
            subset="GrooveMIDI_processed_train",
            hvo_pickle_filename="hvo_sequence_data.obj",
            list_of_filter_dicts_for_subsets=list_of_filter_dicts_for_subsets,
    )

tags_by_style_and_beat, subsets_by_style_and_beat = gmd_by_styl_and_beat.create_subsets()
for ix, (tag, subset) in enumerate(zip(tags_by_style_and_beat, subsets_by_style_and_beat)):
    print("Subset {} --> Tag {} --> n_samples {}".format(ix, tag, len(subset)))

##### 1. D. Create style specific subsets BUT ignore fills and Also return the full set (by adding a [None] filter)
----
 

In [38]:
list_of_filter_dicts_for_subsets = [{"style_primary": [style], "beat_type": ["beat"], "time_signature": ["4-4"]} for style in styles]
list_of_filter_dicts_for_subsets.insert(0,None)
list_of_filter_dicts_for_subsets

[None,
 {'style_primary': ['afrobeat'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['afrocuban'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['blues'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['country'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['dance'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['funk'], 'beat_type': ['beat'], 'time_signature': ['4-4']},
 {'style_primary': ['gospel'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['highlife'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['hiphop'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['jazz'], 'beat_type': ['beat'], 'time_signature': ['4-4']},
 {'style_primary': ['latin'],
  'beat_type': ['beat'],
  'time_signature': ['4-4']},
 {'style_primary': ['middleeastern'],
  'beat_type': [

In [39]:
gmd = GrooveMidiSubsetter(
            pickle_source_path="../datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs",
            subset="GrooveMIDI_processed_train",
            hvo_pickle_filename="hvo_sequence_data.obj",
            list_of_filter_dicts_for_subsets=list_of_filter_dicts_for_subsets,
    )

tags, subsets = gmd.create_subsets()
for ix, (tag, subset) in enumerate(zip(tags, subsets)):
    print("Subset {} --> Tag {} --> n_samples {}".format(ix, tag, len(subset)))

Subset 0 --> Tag [''] --> n_samples 17108
Subset 1 --> Tag ['afrobeat']_AND_['beat']_AND_['4-4'] --> n_samples 779
Subset 2 --> Tag ['afrocuban']_AND_['beat']_AND_['4-4'] --> n_samples 772
Subset 3 --> Tag ['blues']_AND_['beat']_AND_['4-4'] --> n_samples 87
Subset 4 --> Tag ['country']_AND_['beat']_AND_['4-4'] --> n_samples 119
Subset 5 --> Tag ['dance']_AND_['beat']_AND_['4-4'] --> n_samples 548
Subset 6 --> Tag ['funk']_AND_['beat']_AND_['4-4'] --> n_samples 1578
Subset 7 --> Tag ['gospel']_AND_['beat']_AND_['4-4'] --> n_samples 0
Subset 8 --> Tag ['highlife']_AND_['beat']_AND_['4-4'] --> n_samples 47
Subset 9 --> Tag ['hiphop']_AND_['beat']_AND_['4-4'] --> n_samples 712
Subset 10 --> Tag ['jazz']_AND_['beat']_AND_['4-4'] --> n_samples 2083
Subset 11 --> Tag ['latin']_AND_['beat']_AND_['4-4'] --> n_samples 2770
Subset 12 --> Tag ['middleeastern']_AND_['beat']_AND_['4-4'] --> n_samples 118
Subset 13 --> Tag ['neworleans']_AND_['beat']_AND_['4-4'] --> n_samples 721
Subset 14 --> Tag ['

##### 1. E. Create style specific subsets BUT ignore fills and Also return the full set in 4-4
----
 

In [42]:
list_of_filter_dicts_for_subsets = [{"style_primary": [style], "beat_type": ["beat"], "time_signature": ["4-4"]} for style in styles]
list_of_filter_dicts_for_subsets.insert(0,{"beat_type": ["beat"], "time_signature": ["4-4"]})
print(list_of_filter_dicts_for_subsets)

[{'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['afrobeat'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['afrocuban'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['blues'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['country'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['dance'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['funk'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['gospel'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['highlife'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['hiphop'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['jazz'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['latin'], 'beat_type': ['beat'], 'time_signature': ['4-4']}, {'style_primary': ['middleeastern'], 'beat_type': ['bea

In [41]:
gmd = GrooveMidiSubsetter(
            pickle_source_path="../datasets_extracted_locally/GrooveMidi/hvo_0.3.0/Processed_On_13_05_2021_at_12_56_hrs",
            subset="GrooveMIDI_processed_train",
            hvo_pickle_filename="hvo_sequence_data.obj",
            list_of_filter_dicts_for_subsets=list_of_filter_dicts_for_subsets,
    )

tags, subsets = gmd.create_subsets()
for ix, (tag, subset) in enumerate(zip(tags, subsets)):
    print("Subset {} --> Tag {} --> n_samples {}".format(ix, tag, len(subset)))

Subset 0 --> Tag ['beat']_AND_['4-4'] --> n_samples 16266
Subset 1 --> Tag ['afrobeat']_AND_['beat']_AND_['4-4'] --> n_samples 779
Subset 2 --> Tag ['afrocuban']_AND_['beat']_AND_['4-4'] --> n_samples 772
Subset 3 --> Tag ['blues']_AND_['beat']_AND_['4-4'] --> n_samples 87
Subset 4 --> Tag ['country']_AND_['beat']_AND_['4-4'] --> n_samples 119
Subset 5 --> Tag ['dance']_AND_['beat']_AND_['4-4'] --> n_samples 548
Subset 6 --> Tag ['funk']_AND_['beat']_AND_['4-4'] --> n_samples 1578
Subset 7 --> Tag ['gospel']_AND_['beat']_AND_['4-4'] --> n_samples 0
Subset 8 --> Tag ['highlife']_AND_['beat']_AND_['4-4'] --> n_samples 47
Subset 9 --> Tag ['hiphop']_AND_['beat']_AND_['4-4'] --> n_samples 712
Subset 10 --> Tag ['jazz']_AND_['beat']_AND_['4-4'] --> n_samples 2083
Subset 11 --> Tag ['latin']_AND_['beat']_AND_['4-4'] --> n_samples 2770
Subset 12 --> Tag ['middleeastern']_AND_['beat']_AND_['4-4'] --> n_samples 118
Subset 13 --> Tag ['neworleans']_AND_['beat']_AND_['4-4'] --> n_samples 721
Subs

In [None]:
list_of_filter_dicts_for_subsets =

[
    {'style_primary': ['afrobeat'], 'beat_type': ['beat'], 'time_signature': ['4-4']},
         
        .
        .
        .
    
    {'style_primary': ['soul'], 'beat_type': ['beat'], 'time_signature': ['4-4']}
]