In [4]:
import os
import numpy as np
import librosa
import pandas as pd
from natsort import natsorted 
import librosa.display
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

%matplotlib inline
import matplotlib.pyplot as plt

In [5]:
#all composers with more than 10 recordings in the dataset(from excel analaysis)
composer_list=['Beethoven', 'Bach', 'Schubert', 'Mozart', 'Brahms']

In [6]:
df = pd.read_csv("musicnet_original_metadata.csv", encoding="ISO-8859-1")
#row = df.loc[df['id'] == 1727]
#print(row)
df['composer'].value_counts()

composer
Beethoven    157
Bach          67
Schubert      30
Mozart        24
Brahms        24
Cambini        9
Dvorak         8
Faure          4
Ravel          4
Haydn          3
Name: count, dtype: int64

In [14]:
import os
import numpy as np
import pandas as pd
import librosa
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from tensorflow.keras import layers, models
import config1

# Your chosen composers
composer_list = ['Beethoven', 'Bach', 'Schubert', 'Mozart', 'Brahms']

# Load metadata
df = pd.read_csv("musicnet_original_metadata.csv", encoding="ISO-8859-1")

# Helper function to get subdirectories
def get_subdirectories(directory):
    return [f for f in os.listdir(directory) if os.path.isdir(os.path.join(directory, f))]

# Helper function to get audio file paths
def get_audios_path(base_dir, sub_folder, samp_rate):
    folder_path = os.path.join(base_dir, sub_folder)
    return [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.wav')]

# Dummy placeholder - replace with your own feature extraction logic
def extract_features(file_path, sr, frame_size, hop_size):
    y, sr = librosa.load(file_path, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=frame_size, hop_length=hop_size, n_mels=128)
    log_mel = librosa.power_to_db(mel)
    
    # Fix time frames to a constant length (pad or cut)
    fixed_frames = 130
    if log_mel.shape[1] < fixed_frames:
        pad_width = fixed_frames - log_mel.shape[1]
        log_mel = np.pad(log_mel, ((0, 0), (0, pad_width)), mode='constant')
    else:
        log_mel = log_mel[:, :fixed_frames]

    return log_mel

def main():
    samp_rate = config1.CreateDataset.SAMPLING_RATE
    frame_size = config1.CreateDataset.FRAME_SIZE
    hop_size = config1.CreateDataset.HOP_SIZE
    dataset_dir = "musicnet/musicnet"

    sub_folders = get_subdirectories(dataset_dir)

    features_list = []
    labels_list = []

    print("Extracting features from audios...")
    for sub_folder in sub_folders:
        print(".....Working in folder:", sub_folder)
        audios_path = get_audios_path(dataset_dir, sub_folder, samp_rate)
        
        for audio_path in audios_path:
            audio_name = os.path.splitext(os.path.basename(audio_path))[0]
            row = df.loc[df['id'] == int(audio_name)]

            if row.empty:
                continue

            label = row.iloc[0]['composer']
            if label in composer_list:
                features = extract_features(audio_path, samp_rate, frame_size, hop_size)
                features_list.append(features)
                labels_list.append(label)

    features_array = np.array(features_list)
    labels_array = np.array(labels_list)

    X = np.expand_dims(features_array, axis=-1)
    y = labels_array

    print("Extracted features from", len(features_list), "audio files.")
    print("Features array shape:", X.shape)
    print("Labels array shape:", y.shape)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)

    # Encode labels
    class_names = np.unique(y)
    num_classes = len(class_names)
    class_to_int = {name: i for i, name in enumerate(class_names)}

    y_train_int = np.array([class_to_int[label] for label in y_train])
    y_val_int = np.array([class_to_int[label] for label in y_val])
    y_test_int = np.array([class_to_int[label] for label in y_test])

    y_train_cat = to_categorical(y_train_int, num_classes=num_classes)
    y_val_cat = to_categorical(y_val_int, num_classes=num_classes)
    y_test_cat = to_categorical(y_test_int, num_classes=num_classes)

    def build_cnn_model(input_shape, num_classes):
        model = models.Sequential()
        model.add(layers.Conv2D(16, (3, 3), activation='relu', input_shape=input_shape))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(32, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(64, (3, 3), activation='relu'))
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Flatten())
        model.add(layers.Dense(128, activation='relu'))
        model.add(layers.Dropout(0.3))
        model.add(layers.Dense(num_classes, activation='softmax'))
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    input_shape = X_train.shape[1:]  # Should be (128, fixed_frames, 1)
    model = build_cnn_model(input_shape, num_classes)

    history = model.fit(
        X_train, y_train_cat,
        validation_data=(X_val, y_val_cat),
        epochs=20,
        batch_size=16
    )

    test_loss, test_acc = model.evaluate(X_test, y_test_cat)
    print("Test Accuracy:", test_acc)

if __name__ == '__main__':
    main()


Extracting features from audios...
.....Working in folder: test_labels
.....Working in folder: test_data
.....Working in folder: train_data
.....Working in folder: train_labels
Extracted features from 302 audio files.
Features array shape: (302, 128, 130, 1)
Labels array shape: (302,)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.3210 - loss: 7.0524 - val_accuracy: 0.5200 - val_loss: 1.2960
Epoch 2/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5455 - loss: 1.2597 - val_accuracy: 0.5200 - val_loss: 1.2944
Epoch 3/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5316 - loss: 1.1906 - val_accuracy: 0.5600 - val_loss: 1.1936
Epoch 4/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.5652 - loss: 1.0538 - val_accuracy: 0.5600 - val_loss: 1.0840
Epoch 5/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.6005 - loss: 0.9268 - val_accuracy: 0.6000 - val_loss: 1.1256
Epoch 6/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.7647 - loss: 0.7930 - val_accuracy: 0.6000 - val_loss: 1.0282
Epoch 7/20
[1m14/14[0m [32m━━━━