# Imports

In [1]:
# Metadata & Audio Access
import os
import utils

# Metadata Management
import pandas as PD

# Record Generation
from multiprocessing import Pool, cpu_count
import tensorflow as TF
from generate_records import worker

----

## Constants

In [2]:
# Audio Files
TRACKS_FILE = os.path.join(os.getcwd(), 'fma_metadata', 'tracks.csv')
AUDIO_DIR = os.path.join('D:', 'fma_large')

# TFRecord Directories
RECORD_DIR = os.path.join('D:', 'Track_Records')
TRAIN_DIR = os.path.join(RECORD_DIR, 'train')
TEST_DIR = os.path.join(RECORD_DIR, 'test')

# Time
SECONDS = 10
RECORD_SIZE = 1_000

# Multiprocessing Worker Count
N_WORKERS = cpu_count() - 1

----

## Creating the Metadata Dataframe

### Retrieving Raw Metadata

In [3]:
# Retrieving track metadata for tracks in 'fma_large'.
tracks = utils.load(TRACKS_FILE)
large_tracks = tracks[tracks['set', 'subset'] <= 'large']

# Retrieving track genres and training/testing classification.
metadata = PD.merge(
    large_tracks['track']['genre_top'], large_tracks['set']['split'], 
    left_index = True, right_index = True
)

metadata['genre_top'].value_counts(ascending = True)

Easy Listening            24
Blues                    110
Soul-RnB                 175
Country                  194
Spoken                   423
Old-Time / Historic      554
Jazz                     571
Classical               1230
International           1389
Instrumental            2079
Pop                     2332
Folk                    2803
Hip-Hop                 3552
Electronic              9372
Experimental           10608
Rock                   14182
Name: genre_top, dtype: int64

### Metadata Processing

In [4]:
# Due to the data imbalance per genre, genre changes need to be made:
metadata = metadata[metadata['genre_top'].notna()]
metadata['genre_top'] = metadata['genre_top'].replace({
    'Blues'               : 'Jazz & Blues',
    'Soul-RnB'            : 'Jazz & Blues',
    'Jazz'                : 'Jazz & Blues',
    'Easy Listening'      : 'Jazz & Blues',
    'Country'             : 'Folk',
    'Old-Time / Historic' : 'Historic',
})

metadata['genre_top'].value_counts(ascending = True, normalize = True)

Spoken           0.008529
Historic         0.011170
Jazz & Blues     0.017743
Classical        0.024799
International    0.028005
Instrumental     0.041917
Pop              0.047018
Folk             0.060426
Hip-Hop          0.071616
Electronic       0.188959
Experimental     0.213880
Rock             0.285939
Name: genre_top, dtype: float64

### Categorical Conversion

In [5]:
metadata['genre_top'] = metadata['genre_top'].astype('category').cat.codes
metadata['genre_top'].value_counts(ascending = True, normalize = True)

11    0.008529
5     0.011170
8     0.017743
0     0.024799
7     0.028005
6     0.041917
9     0.047018
3     0.060426
4     0.071616
1     0.188959
2     0.213880
10    0.285939
Name: genre_top, dtype: float64

----

## MP3 to TFRecord Conversion

### Job Generator

In [6]:
def create_jobs(metadata):
    jobs = []
    
    # Iterates over every track.
    for ID, track in metadata.iterrows():
        
        # Determining audio file pathname and label.
        pathname = utils.get_audio_path(AUDIO_DIR, ID)
        label = track['genre_top']
        
        # Determining how many samples to take from the track.
        # (For dataset balancing)
        for step in range((1 if label in [1, 2, 10] else 3)):
            offset = (step * SECONDS)
            
            jobs.append((pathname, label, offset))
            
    return jobs

### Record Creation

In [7]:
# Creates all records for the given metadata.
def create_records(jobs, training):
    DIR = (TRAIN_DIR if training else TEST_DIR)
    
    # Writing the data in batches of 1000.
    for S in range(0, len(jobs), RECORD_SIZE):
        print(f'ON: {S}')
        
        # Creating the record file and subset job array.
        job_set = jobs[S : (S + RECORD_SIZE)] 
        filename = os.path.join(DIR, f'{S}.tfr')
        
        create_record(job_set, filename)

# Creates a single record for a subset of the metadata.
def create_record(jobs, filename):
    with TF.io.TFRecordWriter(filename) as writer:
        # Creating a small job batch equal to the number of workers.
        for S in range(0, RECORD_SIZE, N_WORKERS):
            job_set = jobs[S : (S + N_WORKERS)]
            write_data(job_set, writer)

# Parses and writes the audio to a TFRecord.
def write_data(jobs, writer):
    with Pool(processes = N_WORKERS) as pool:
        for result in pool.map(worker, jobs):
            # Prints errors & saves TFRecord Examples.
            if isinstance(result, Exception):
                print(result)
            else:
                writer.write(result)

### Main Function

In [8]:
if __name__ == '__main__':
    # Initializing directories.
    for DIR in [RECORD_DIR, TRAIN_DIR, TEST_DIR]:
        if not os.path.exists(DIR):
            os.mkdir(DIR)
    
    # Getting the job lists.
    TRAIN_jobs = create_jobs(metadata[metadata['split'] != 'validation'])
    TEST_jobs = create_jobs(metadata[metadata['split'] == 'validation'])
    
    # Printing total job counts.
    print(f'TRAIN SAMPLES: {len(TRAIN_jobs)}')
    print(f'TEST SAMPLES: {len(TEST_jobs)}')
    print('-------------------------------')
    
    # Generating TFRecord files.
    create_records(TRAIN_jobs, True)
    create_records(TEST_jobs, False)

TRAIN SAMPLES: 72680
TEST SAMPLES: 7790
-------------------------------
ON: 0
WRONG DIMENSIONS (353, 84) AT 0: D:fma_large\000\000784.mp3
WRONG DIMENSIONS (204, 84) AT 0: D:fma_large\000\000789.mp3
WRONG DIMENSIONS (273, 84) AT 0: D:fma_large\000\000794.mp3
ON: 1000
WRONG DIMENSIONS (428, 84) AT 0: D:fma_large\001\001176.mp3

ON: 2000
WRONG DIMENSIONS (353, 84) AT 10: D:fma_large\003\003448.mp3
Input signal length=0 is too small to resample from 44100->22050
ON: 3000

ON: 4000
WRONG DIMENSIONS (218, 84) AT 0: D:fma_large\006\006713.mp3
WRONG DIMENSIONS (175, 84) AT 0: D:fma_large\008\008325.mp3
WRONG DIMENSIONS (262, 84) AT 0: D:fma_large\008\008352.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\008\008996.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\008\008997.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\008\008998.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\008\008999.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009000.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma

WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009131.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009132.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009133.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009134.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009135.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009136.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009137.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009138.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009139.mp3
WRONG DIMENSIONS (380, 84) AT 0: D:fma_large\009\009140.mp3
WRONG DIMENSIONS (380, 84) AT 0: D:fma_large\009\009141.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009142.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009143.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009144.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009145.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_large\009\009146.mp3
WRONG DIMENSIONS (348, 84) AT 0: D:fma_l

ON: 17000
WRONG DIMENSIONS (305, 84) AT 20: D:fma_large\030\030130.mp3
WRONG DIMENSIONS (415, 84) AT 20: D:fma_large\030\030207.mp3
WRONG DIMENSIONS (429, 84) AT 20: D:fma_large\031\031375.mp3
WRONG DIMENSIONS (411, 84) AT 10: D:fma_large\031\031771.mp3
Input signal length=0 is too small to resample from 44100->22050
ON: 18000





















ON: 19000
WRONG DIMENSIONS (386, 84) AT 0: D:fma_large\036\036102.mp3
WRONG DIMENSIONS (46, 84) AT 10: D:fma_large\036\036206.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (250, 84) AT 0: D:fma_large\036\036210.mp3
Input signal length=0 is too small to resample from 44100->22050
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (210, 84) AT 20: D:fma_large\036\036458.mp3
WRONG DIMENSIONS (369, 84) AT 10: D:fma_large\036\036469.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (128, 84) AT 20: D:fma_large\036\036515.mp3
WRONG DIMENSIONS 

WRONG DIMENSIONS (258, 84) AT 0: D:fma_large\075\075167.mp3
WRONG DIMENSIONS (311, 84) AT 0: D:fma_large\075\075188.mp3
WRONG DIMENSIONS (103, 84) AT 0: D:fma_large\075\075189.mp3
WRONG DIMENSIONS (327, 84) AT 20: D:fma_large\075\075847.mp3
WRONG DIMENSIONS (252, 84) AT 10: D:fma_large\075\075857.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (245, 84) AT 20: D:fma_large\075\075863.mp3
ON: 40000
WRONG DIMENSIONS (130, 84) AT 20: D:fma_large\075\075888.mp3
WRONG DIMENSIONS (346, 84) AT 10: D:fma_large\078\078515.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (151, 84) AT 20: D:fma_large\078\078986.mp3
ON: 41000



ON: 42000
WRONG DIMENSIONS (326, 84) AT 0: D:fma_large\083\083406.mp3
WRONG DIMENSIONS (95, 84) AT 20: D:fma_large\084\084160.mp3
WRONG DIMENSIONS (300, 84) AT 0: D:fma_large\084\084413.mp3
WRONG DIMENSIONS (267, 84) AT 0: D:fma_large\084\084419.mp3
WRONG DIMENSIONS (365, 84) AT 0: D:fma_large\084\08

WRONG DIMENSIONS (216, 84) AT 10: D:fma_large\114\114088.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (229, 84) AT 10: D:fma_large\114\114089.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (235, 84) AT 10: D:fma_large\114\114090.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (208, 84) AT 10: D:fma_large\114\114091.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (242, 84) AT 10: D:fma_large\114\114092.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (216, 84) AT 10: D:fma_large\114\114093.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (216, 84) AT 10: D:fma_large\114\114094.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (216, 84) AT 10: D:fma_large\114\114095.mp3
Input signal length=0 is too small to resample from 44100

Input signal length=0 is too small to resample from 44100->22050
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (218, 84) AT 10: D:fma_large\114\114542.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (153, 84) AT 10: D:fma_large\114\114543.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (182, 84) AT 10: D:fma_large\114\114557.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (178, 84) AT 10: D:fma_large\114\114561.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (218, 84) AT 10: D:fma_large\114\114562.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (218, 84) AT 10: D:fma_large\114\114568.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (185, 84) AT 10: D:fma_large\114\114569.mp3
Input signal length=0 is too small to resample from 4

Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (214, 84) AT 10: D:fma_large\115\115107.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (210, 84) AT 10: D:fma_large\115\115108.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (184, 84) AT 10: D:fma_large\115\115109.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (214, 84) AT 10: D:fma_large\115\115110.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (214, 84) AT 10: D:fma_large\115\115111.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (214, 84) AT 10: D:fma_large\115\115112.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (225, 84) AT 10: D:fma_large\115\115115.mp3
Input signal length=0 is too small to resample from 48000->22050
WRONG DIMENSIONS (219, 84) AT 10: D:fma_large\115\115

Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (212, 84) AT 10: D:fma_large\115\115305.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (212, 84) AT 10: D:fma_large\115\115306.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (199, 84) AT 10: D:fma_large\115\115307.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (388, 84) AT 0: D:fma_large\115\115308.mp3
Input signal length=0 is too small to resample from 44100->22050
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (196, 84) AT 10: D:fma_large\115\115318.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (196, 84) AT 10: D:fma_large\115\115319.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (207, 84) AT 10: D:fma_large\115\115320.mp3
Input signal length=0 is too small to resample from 44

WRONG DIMENSIONS (217, 84) AT 10: D:fma_large\115\115512.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (133, 84) AT 0: D:fma_large\115\115513.mp3
Input signal length=0 is too small to resample from 44100->22050
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (175, 84) AT 10: D:fma_large\115\115515.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (236, 84) AT 10: D:fma_large\115\115517.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (208, 84) AT 10: D:fma_large\115\115518.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (178, 84) AT 20: D:fma_large\115\115889.mp3
WRONG DIMENSIONS (188, 84) AT 0: D:fma_large\116\116696.mp3
Input signal length=0 is too small to resample from 44100->22050
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (188, 84) AT 0: D:fma_large\116\116705

WRONG DIMENSIONS (227, 84) AT 10: D:fma_large\115\115133.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (245, 84) AT 10: D:fma_large\115\115134.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (175, 84) AT 10: D:fma_large\115\115156.mp3
Input signal length=0 is too small to resample from 44100->22050
WRONG DIMENSIONS (87, 84) AT 10: D:fma_large\115\115476.mp3
Input signal length=0 is too small to resample from 48000->22050
WRONG DIMENSIONS (325, 84) AT 20: D:fma_large\119\119327.mp3
ON: 7000
WRONG DIMENSIONS (325, 84) AT 20: D:fma_large\143\143980.mp3
WRONG DIMENSIONS (389, 84) AT 0: D:fma_large\155\155006.mp3
WRONG DIMENSIONS (416, 84) AT 0: D:fma_large\155\155014.mp3


----

## Balanced/Unbalanced Data Check

In [12]:
# TFRecord Structure
FEATURE_STRUCTURE = {
    'parameters': TF.io.FixedLenFeature([430 * 84], TF.float32),
    'label': TF.io.FixedLenFeature([], TF.int64)
}

# Parses Byte Values
def parse_dataset(feature):
    data = TF.io.parse_single_example(feature, FEATURE_STRUCTURE)
    return data['label']

# Retrieves categorical label counts.
def dataset_count(DIR):
    # 12 Labels Total
    category_counts = [0] * 12
    
    # Parses all files in the specified directory.
    for file in os.listdir(DIR):
        # Parsing
        dataset = TF.data.TFRecordDataset(os.path.join(DIR, file))
        dataset = dataset.map(
            parse_dataset,
            num_parallel_calls = TF.data.experimental.AUTOTUNE,
            deterministic = False
        )

        # Adding to the counter.
        for element in iter(dataset):
            category_counts[element] += 1

    # Logging Results
    print(category_counts)
    print(sum(category_counts))
        
dataset_count(TRAIN_DIR)
dataset_count(TEST_DIR)

[3450, 8320, 9571, 8195, 9624, 1482, 5600, 3712, 2353, 5401, 12809, 904]
71421
[222, 865, 964, 741, 953, 162, 573, 408, 282, 880, 1323, 341]
7714
