In [1]:
import numpy as np
import scipy
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import librosa
import librosa.display
import os
%matplotlib inline

AUDIO_DIR = 'C:\\Users\\gaura\\Desktop\\fma_small'
META_DIR = 'C:\\Users\\gaura\\Desktop\\fma_metadata\\tracks.csv'

PATH = 'audio_files/'

In [2]:

def get_tids_from_directory(audio_dir):
    """Get track IDs from the mp3s in a directory.
    Parameters
    ----------
    audio_dir : str
        Path to the directory where the audio files are stored.
    Returns
    -------
        A list of track IDs.
    """
    tids = []
    for _, dirnames, files in os.walk(audio_dir):
        if dirnames == []:
            tids.extend(int(file[:-4]) for file in files)
    return tids


def get_audio_path(audio_dir, track_id):
    """
    Return the path to the mp3 given the directory where the audio is stored
    and the track ID.
    Examples
    --------
    >>> import utils
    >>> AUDIO_DIR = os.environ.get('AUDIO_DIR')
    >>> utils.get_audio_path(AUDIO_DIR, 2)
    '../data/fma_small/000/000002.mp3'
    """
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

In [3]:
tids = get_tids_from_directory(AUDIO_DIR)

len(tids)

8000

In [None]:
def mfcc_calc(track_id):
    # filename = get_audio_path(AUDIO_DIR, track_id)
    name = get_audio_path(AUDIO_DIR, track_id) 
    x, fs = librosa.load(name)
    # librosa.display.waveplot(x, sr=fs)
    mfcc = librosa.feature.melspectrogram(
        x, sr=fs, power=2.0,  n_fft=2048, hop_length=512)
    # plt.savefig(f"spectral_outputs/{file_name.split('.')[0]}_spectrogram.png")
    power_db = librosa.power_to_db(mfcc, ref=np.max)

    # plt.figure()
    # librosa.display.specshow(power_db, sr=fs, x_axis='time', y_axis='mel')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title(str(track_id))
    # # plt.title(str(genre))
    # plt.savefig(f"spectral_outputs/{file_name.split('.')[0]}_melscale.png")
    # print(f"Saving to file: {genre}.png")
    print(f"Sampling Rate: {fs}")
    print(f"Processing File: {name}")
    # plt.savefig(f"spectral_outputs/{genre}_melscale.png")

    # fft(x, fs, file_name)
    return power_db.T

In [5]:
def fft(x, fs, file_name):
    X = scipy.fft.fft(x)
    X_mag = np.absolute(X)
    f = np.linspace(0, fs, len(X_mag))
    plt.figure()
    plt.plot(f, X_mag)
    plt.xlabel('Frequency (Hz)')
    plt.savefig(f"spectral_outputs/{file_name.split('.')[0]}_fft.png")
    

In [6]:
tracks = pd.read_csv(META_DIR, index_col=0, header=[0, 1])
keep_cols = [('set', 'split'),('set', 'subset'),('track', 'genre_top')]

df_all = tracks[keep_cols]
df_all = df_all[df_all[('set', 'subset')] == 'small']

df_all['track_id'] = df_all.index
df_all.head()

Unnamed: 0_level_0,set,set,track,track_id
Unnamed: 0_level_1,split,subset,genre_top,Unnamed: 4_level_1
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2,training,small,Hip-Hop,2
5,training,small,Hip-Hop,5
10,training,small,Pop,10
140,training,small,Folk,140
141,training,small,Folk,141


In [7]:
df_all[('set', 'split')].value_counts()

training      6400
test           800
validation     800
Name: (set, split), dtype: int64

In [8]:
 y = pd.DataFrame(tracks.columns)
 y.to_csv('trackinfo.csv')

In [9]:
y = np.array(tracks[('track','bit_rate')].unique())
print(y)
print(f"Max: {y.max()}")
print(f"Min: {y.min()}")
print(f"Average: {y.mean()}")
print(f"Standard Deviation: {y.std()}")

info = {"Measures":["Max", "Min", "Avg","Std"],"Values": [y.max(), y.min(), y.mean(),y.std()]}
scale = pd.DataFrame(info)
scale

[256000 192000 128000 ... 319784 320172 320453]
Max: 448000
Min: -1
Average: 230190.04919528787
Standard Deviation: 51891.9852258237


Unnamed: 0,Measures,Values
0,Max,448000.0
1,Min,-1.0
2,Avg,230190.049195
3,Std,51891.985226


In [10]:
dict_genres = {'Electronic':1, 'Experimental':2, 'Folk':3, 'Hip-Hop':4, 'Instrumental':5,'International':6, 'Pop' :7, 'Rock': 8  }

In [11]:
def create_array(df):
    genres = []
    X_spect = np.empty((0, 640, 128))
    count = 0
    #Code skips records in case of errors
    for index, row in df.iterrows():
        count += 1
        track_id = int(row['track_id'])
        genre = str(row[('track', 'genre_top')])
        spect = mfcc_calc(track_id)
        # Normalize for small shape differences
        spect = spect[:640, :]
        X_spect = np.append(X_spect, [spect], axis=0)            
        genres.append(dict_genres[genre])
        if count % 100 == 0:
            print("Currently processing: ", count)
        
    y_arr = np.array(genres)
    return X_spect, y_arr

In [13]:
train = df_all[df_all[("set", "split")]=="training"]
test = df_all[df_all[("set", "split")]=="test"]
validation = df_all[df_all[("set", "split")]=="validation"]

In [14]:
train.shape, test.shape, validation.shape

((6400, 4), (800, 4), (800, 4))

In [18]:
X_test, y_test = create_array(test)

Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate: 22050
Sampling Rate