# Is it a banger? - Make your own dataset

### TODO

Discuss folder structure, `split_files.sh` script, duration of each clip, `p_include`

#### Folder structure
```
data
├── label_1
├── label_2
├──    ·
├──    ·
├──    ·
└── label_k
```

For the given example **Need link here eventually**, we simply have

```
data
├── banger
└── not_a_banger
```

#### File splitting - EXPLAIN WHAT THIS DOES, UPDATE IF FILE CHANGED
```bash
#!/bin/bash

SEGMENT_TIME=5 # in seconds
DATA_ROOT_DIR="../data"

DIRS=$(find "${DATA_ROOT_DIR}" -maxdepth 1 -mindepth 1 -type d)

# Make sure globstar is enabled
shopt -s globstar

for FILE in "${DATA_ROOT_DIR}"/**/*.mp3
do 
    echo "Processing ${FILE}"
    ffmpeg -i "${FILE}" -f segment -segment_time ${SEGMENT_TIME} -c copy "${FILE%.*}"%03d.wav                
    rm "${FILE}"
    rm "$(ls -t "${FILE%.*}"*.wav | tail -n 1)" # remove last file so uniform length
done
```

In [1]:
import os
import glob
import librosa
import numpy as np
np.random.seed(1234)
import pandas as pd

In [4]:
parent_dir = '../data'
parent_dir_contents = [os.path.join(parent_dir, dirname) for dirname in os.listdir(parent_dir)]
sub_dirs = [filename if os.path.isdir(filename) else None for filename in parent_dir_contents]
sub_dirs = list(filter(None.__ne__, sub_dirs))
labels_list = [os.path.relpath(path, parent_dir) for path in sub_dirs]

In [5]:
def extract_features(file_name, sample_rate=22050, segment_time=1, samples_to_clip=500):
    audio, sample_rate = librosa.load(file_name, sr=sample_rate)
    end_idx = sample_rate * segment_time - samples_to_clip # remove some end samples as not strictly uniform size
    audio = audio[0:end_idx]
    log_specgram = librosa.logamplitude(np.abs(librosa.stft(audio))**2, ref_power=np.max)
    features = {"audio": audio, "log_specgram": log_specgram}
    return features

def one_hot_encode(label, labels_list):
    n_labels = len(labels_list)
    one_hot_encoded = np.zeros(n_labels)
    for idx, cmp in enumerate(labels_list):
        if label == cmp:
            one_hot_encoded[idx] = 1                     
    return one_hot_encoded

def trim_file_list(fnames_list, p_include=1.0):
    fnames_list = np.asarray(fnames_list)
    include = np.random.rand(*fnames_list.shape)
    fnames_list = fnames_list[include < p_include]
    return fnames_list
    

def parse_audio_files(parent_dir, sub_dirs_list, labels_list, file_ext='*.wav', p_include=1.0,\
                      sample_rate=22050, segment_time=1, samples_to_clip=500):
    data = []
    index = []
    for label_idx, sub_dir in enumerate(sub_dirs_list):
        fnames_list = glob.glob(os.path.join(sub_dir, "*.wav"))
        fnames_list = trim_file_list(fnames_list, p_include=p_include)
        for fname in fnames_list:
            print("Processing " + os.path.basename(fname))
            features = extract_features(fname)
            label = labels_list[label_idx]
            label_one_hot = one_hot_encode(label, labels_list)
            features['label'] = label
            features["label_one_hot"] = label_one_hot
            data.append(features)
            index.append(os.path.basename(fname))
    return pd.DataFrame(data, index=index)

In [7]:
df = parse_audio_files(parent_dir, sub_dirs, labels_list, p_include=0.3, segment_time=5)
df = df.iloc[np.random.permutation(len(df))] # shuffle rows
df.to_pickle(os.path.join(parent_dir, 'processed_dataset.pkl'))

Processing Fun. - We Are Young ft. Janelle Monáe [OFFICIAL VIDEO]010.wav
Processing The Lumineers - Big Parade048.wav
Processing Fun. - We Are Young ft. Janelle Monáe [OFFICIAL VIDEO]038.wav
Processing The Lumineers - Holdin' Out - Storks - Original Motion Picture Soundtrack009.wav
Processing MACKLEMORE & RYAN LEWIS - THRIFT SHOP FEAT. WANZ (OFFICIAL VIDEO)026.wav
Processing Imagine Dragons - Radioactive013.wav
Processing Capital Cities - Safe And Sound (Official Video)014.wav
Processing The Lumineers - White Lie (lyrics)025.wav
Processing Bastille - Pompeii017.wav
Processing Avicii - Wake Me Up (Official Video)012.wav
Processing OneRepublic - Counting Stars036.wav
Processing The Lumineers - Ain't Nobody's Problem023.wav
Processing The Lumineers - Morning Song060.wav
Processing Bruno Mars - Locked Out Of Heaven [OFFICIAL VIDEO]046.wav
Processing The Lumineers - Ain't Nobody's Problem037.wav
Processing The Lumineers - Slow It Down056.wav
Processing The Lumineers   This Must Be The Place

Processing The Lumineers - White Lie (lyrics)021.wav
Processing Lorde - Royals (US Version)004.wav
Processing Imagine Dragons - Demons (Official)028.wav
Processing The Lumineers - Holdin' Out - Storks - Original Motion Picture Soundtrack019.wav
Processing MACKLEMORE & RYAN LEWIS - THRIFT SHOP FEAT. WANZ (OFFICIAL VIDEO)022.wav
Processing Phillip Phillips - Home003.wav
Processing The Lumineers - 'Stubborn Love' (Official Video)048.wav
Processing Sleep On The Floor (LYRICS) - The Lumineers041.wav
Processing Passenger _ Let Her Go (Official Video)011.wav
Processing The Lumineers - 'Submarines' (Official Video)028.wav
Processing The Lumineers - Where The Skies Are Blue [Lyrics]027.wav
Processing The Lumineers - Patience [Lyrics]005.wav
Processing The Lumineers - My Eyes [Lyrics]022.wav
Processing The Lumineers - Big Parade017.wav
Processing Gotye - Somebody That I Used To Know (feat. Kimbra) - official video045.wav
Processing Mumford & Sons - I Will Wait062.wav
Processing Rihanna - Stay ft

Processing The Lumineers - Flapper Girl032.wav
Processing The Lumineers - Slow It Down025.wav
Processing The Lumineers - Flapper Girl026.wav
Processing The Lumineers - Slow It Down019.wav
Processing OneRepublic - Counting Stars051.wav
Processing The Lumineers - Morning Song013.wav
Processing Of Monsters And Men - Little Talks (Official Video)047.wav
Processing Avicii - Wake Me Up (Official Video)049.wav
Processing Bruno Mars - Locked Out Of Heaven [OFFICIAL VIDEO]035.wav
Processing The Lumineers - Darlene [Lyrics in description]007.wav
Processing MACKLEMORE & RYAN LEWIS - THRIFT SHOP FEAT. WANZ (OFFICIAL VIDEO)041.wav
Processing The Lumineers - Cleopatra015.wav
Processing Sleep On The Floor (LYRICS) - The Lumineers036.wav
Processing The Lumineers - Charlie Boy048.wav
Processing The Lumineers - My Eyes [Lyrics]026.wav
Processing Gotye - Somebody That I Used To Know (feat. Kimbra) - official video041.wav
Processing The Lumineers - 'Submarines' (Official Video)038.wav
Processing P!nk - Ju

Processing The Lumineers - Gun Song [Lyrics]008.wav
Processing The Lumineers - Blue Christmas024.wav
Processing The Lumineers - Flapper Girl006.wav
Processing The Lumineers - Gun Song [Lyrics]034.wav
Processing Fun. - Some Nights [OFFICIAL VIDEO]010.wav
Processing Fun. - Some Nights [OFFICIAL VIDEO]039.wav
Processing The Lumineers - Angela062.wav
Processing The Lumineers - Gun Song [Lyrics]021.wav
Processing The Lumineers - Flapper Girl013.wav
Processing The Lumineers - Slow It Down004.wav
Processing The Lumineers - Flapper Girl007.wav
Processing The Lumineers - Blue Christmas031.wav
Processing Bastille - Pompeii045.wav
Processing Avicii - Wake Me Up (Official Video)040.wav
Processing Bruno Mars - Locked Out Of Heaven [OFFICIAL VIDEO]028.wav
Processing The Lumineers - Morning Song026.wav
Processing The Lumineers - Long Way From Home [Lyrics]004.wav
Processing The Lumineers - Darlene [Lyrics in description]026.wav
Processing The Lumineers - Long Way From Home [Lyrics]010.wav
Processing 

Processing Bastille - Pompeii018.wav
Processing The Lumineers - Morning Song053.wav
Processing The Lumineers - Morning Song047.wav
Processing Bastille - Pompeii024.wav
Processing OneRepublic - Counting Stars011.wav
Processing The Lumineers - Ain't Nobody's Problem038.wav
Processing Imagine Dragons - It's Time002.wav
Processing Imagine Dragons - It's Time016.wav
Processing The Lumineers - Flowers In Your Hair021.wav
Processing The Lumineers   This Must Be The Place003.wav
Processing Vance Joy - 'Riptide' Official Video004.wav
Processing Vance Joy - 'Riptide' Official Video010.wav
Processing The Lumineers - In The Light [Lyrics]031.wav
Processing The Lumineers - Nobody Knows (From 'Pete's Dragon')002.wav
Processing The Lumineers - Angela003.wav
Processing The Lumineers - Angela002.wav
Processing The Lumineers - Angela016.wav
Processing The Lumineers - In The Light [Lyrics]018.wav
Processing Vance Joy - 'Riptide' Official Video011.wav
Processing Vance Joy - 'Riptide' Official Video005.wav

Processing Selected New Year Mix143.wav
Processing Selected New Year Mix180.wav
Processing Selected New Year Mix182.wav
Processing Selected New Year Mix155.wav
Processing Selected New Year Mix009.wav
Processing Selected New Year Mix236.wav
Processing Selected New Year Mix356.wav
Processing Selected New Year Mix183.wav
Processing Selected New Year Mix127.wav
Processing Selected New Year Mix331.wav
Processing Selected New Year Mix292.wav
Processing Selected New Year Mix286.wav
Processing Selected New Year Mix279.wav
Processing Selected New Year Mix324.wav
Processing Selected New Year Mix291.wav
Processing Selected New Year Mix087.wav
Processing Selected New Year Mix078.wav
Processing Selected New Year Mix050.wav
Processing Selected New Year Mix051.wav
Processing Selected New Year Mix290.wav
Processing Selected New Year Mix284.wav
Processing Selected New Year Mix327.wav
Processing Selected New Year Mix131.wav
Processing Selected New Year Mix109.wav
Processing Selected New Year Mix337.wav


In [10]:
display(df[:10])

Unnamed: 0,audio,label,label_one_hot,log_specgram
Pacific_State_Will_Bailey_Remix089.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-45.4622, -33.4534, -31.1523, -50.0221, -65...."
Pacific_State_Will_Bailey_Remix011.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-32.9026, -23.5149, -23.0441, -36.4431, -62...."
808 State - In Yer Face (Bicep Remix)058.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-45.6644, -46.1606, -52.6074, -61.5679, -62...."
808 State - In Yer Face (Bicep Remix)031.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-35.3468, -42.0042, -80.0, -42.0002, -37.917..."
Twinkle Twinkle Little Star021.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",not_a_banger,"[0.0, 1.0]","[[-72.6652, -59.9379, -55.9415, -66.4315, -63...."
808 State - In Yer Face (Bicep Remix)008.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-41.8047, -41.0589, -64.2355, -47.7573, -42...."
808 State - In Yer Face (Bicep Remix)051.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-25.1378, -21.8044, -25.6636, -50.0461, -50...."
Pacific_State_Will_Bailey_Remix078.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-79.7082, -43.5325, -38.7551, -50.4211, -74...."
Pacific_State_Will_Bailey_Remix058.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-39.3653, -29.7407, -31.3427, -80.0, -75.851..."
Pacific_State_Will_Bailey_Remix016.wav,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",banger,"[1.0, 0.0]","[[-45.1795, -41.0869, -38.5696, -80.0, -72.835..."
