In [7]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import imageio

### Convert the mp3 audio files to mel-spectrograms

In [8]:
df = pd.read_pickle('fin.pkl')

In [9]:
def mp3_to_mel_spectrogram(mp3_path, output_dir):
    # Load the MP3 file
    y, sr = librosa.load(mp3_path)
    
    # Generate the mel-spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

    current_shape = mel_spectrogram_db.shape
    desired_shape = (128, 1292)
    
    if current_shape == desired_shape:
        # If the shape is already correct, no need to modify
        return mel_spectrogram_db
    elif current_shape[1] < desired_shape[1]:
        # If the number of time frames is less than desired, pad the spectrogram
        pad_width = ((0, 0), (0, desired_shape[1] - current_shape[1]))
        padded_spectrogram = np.pad(mel_spectrogram_db, pad_width, mode='constant')
        return padded_spectrogram
    elif current_shape[1] > desired_shape[1]:
        # If the number of time frames is greater than desired, truncate the spectrogram
        truncated_spectrogram = mel_spectrogram_db[:, :desired_shape[1]]
        return truncated_spectrogram

In [10]:
    # # Save the mel-spectrogram as an image
    # filename = os.path.basename(mp3_path).replace('.mp3', '.png')
    # output_path = os.path.join(output_dir, filename)

    # # imageio.imsave(output_path, mel_spectrogram_db.astype(np.uint8))
    
    # plt.figure(figsize=(20, 6))
    # librosa.display.specshow(mel_spectrogram_db, x_axis='time', y_axis='mel')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Mel-Spectrogram')
    # plt.savefig(output_path)
    # plt.close()

In [11]:
input_directory = 'data/music4all/audios'
output_directory = 'data/music4all/spectrograms'

In [13]:
temp = []

count = 0
for filename in os.listdir(input_directory):
    if count % 1000 == 0:
        print("count: ", count)
    count += 1
    if filename.endswith('.mp3'):
        song_id = filename.split('.')[0]  # Extract the song ID from the filename
        if song_id in df['id'].values:  # Assuming 'song_id' is the column name in your DataFrame
            mp3_path = os.path.join(input_directory, filename)
            temp.append(mp3_to_mel_spectrogram(mp3_path, output_directory))

count:  0
count:  1000
count:  2000
count:  3000
count:  4000
count:  5000
count:  6000
count:  7000
count:  8000
count:  9000
count:  10000
count:  11000
count:  12000
count:  13000
count:  14000
count:  15000
count:  16000
count:  17000
count:  18000
count:  19000
count:  20000
count:  21000
count:  22000
count:  23000
count:  24000
count:  25000
count:  26000
count:  27000
count:  28000
count:  29000
count:  30000
count:  31000
count:  32000
count:  33000
count:  34000
count:  35000
count:  36000
count:  37000
count:  38000
count:  39000
count:  40000
count:  41000
count:  42000
count:  43000
count:  44000
count:  45000
count:  46000
count:  47000
count:  48000
count:  49000
count:  50000
count:  51000
count:  52000
count:  53000
count:  54000
count:  55000
count:  56000
count:  57000
count:  58000
count:  59000
count:  60000
count:  61000
count:  62000
count:  63000
count:  64000
count:  65000
count:  66000
count:  67000
count:  68000
count:  69000
count:  70000
count:  71000
count

In [16]:
print(len(temp))
print(type(temp[0]))

44860
<class 'numpy.ndarray'>


In [21]:
# Create a new HDF5 file
with h5py.File('large_data.h5', 'w') as hf:
    chunk_size = 100  # Set your desired chunk size

    # Create a dataset with chunking enabled
    dset = hf.create_dataset('data', shape=(len(temp), 128, 1292), dtype='float32', chunks=(chunk_size, 128, 1292))

    # Write data in chunks
    for i in range(0, len(temp), chunk_size):
        dset[i:i+chunk_size] = temp[i:i+chunk_size]

### Train a CNN on the mel-spectrograms

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

fin_fil_df = pd.read_pickle('fin.pkl')

encoder = LabelEncoder()

labels = fin_fil_df['genre']

genre_labels = encoder.fit_transform(labels)

print(encoder.classes_)

In [None]:
chunk_size = 100  # Set your desired chunk size
# Create an HDF5 file with specified file size
with h5py.File('spectrogram.h5', 'w', libver='latest', libtiff=0) as hf:
    max_shape = (44860, 128, 1292)
    dset = hf.create_dataset('data', shape=max_shape, dtype='float32', maxshape=max_shape, chunks=True)

    # Write data in chunks
    for i in range(0, 44860, chunk_size):
        dset[i:i+chunk_size] = your_data[i:i+chunk_size]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dset, genre_labels, test_size=0.2)