In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
from sklearn.model_selection import train_test_split

In [None]:
model = tf.keras.models.load_model("C:\Users\DELL\Desktop\cnn_M2E_trans_input_shape")

# Print the model summary to get information about the model's architecture
model.summary()

In [2]:
## File parameter
FOLDER_PATH = '/content/drive/MyDrive/music-retrival'
SPECTROGRAM_313x149_SAVE_DIR = FOLDER_PATH+ "/spectograms_313x149_Mel_func"
ANNOTATION_FILE_PATH = FOLDER_PATH + "/annotations.csv"

MODEL_PATH = FOLDER_PATH + '/model/trans-mid-model/'
## DATA PARAMETER
NUM_INSTANCE = 200
TEST_SIZE = 0.2

## Model Parameter
BATCH_SIZE = 8
EPOCHS = 40
MODEL_SAVE = True

In [4]:
def load_data_label(feature_path, label_file_path, num_instance = 2000):
    y = pd.read_csv(label_file_path, header=0).drop('song_id',axis=1)
    # Load the data from each file and concatenate into a single feature matrix
    feature_matrix = None
    label_matrix = y[:num_instance]

    for i in range(1, num_instance + 1):
        file_path = os.path.join(feature_path, str(i) + ".mp3.npy")
        # print("Processed file " + file_path)
        data = np.load(file_path)
        data = [np.expand_dims(data, axis=-1)]
        if feature_matrix is None:
            feature_matrix = data
        else:
            feature_matrix = np.concatenate((feature_matrix, data), axis=0)
    return feature_matrix, label_matrix


In [6]:
 # load feature, label
feature_matrix, label_matrix = load_data_label(SPECTROGRAM_313x149_SAVE_DIR, ANNOTATION_FILE_PATH, NUM_INSTANCE)

# split train, test
train_features, test_features, train_labels, test_labels = train_test_split(feature_matrix, label_matrix, test_size=TEST_SIZE, random_state=42)

In [13]:
feature_matrix.shape

(200, 313, 149, 1)

In [31]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3

base_model = InceptionV3(weights='imagenet', include_top=False)

# Set the first 10 layers of the model as non-trainable
for layer in base_model.layers[:10]:
    layer.trainable = False

# Create new input layer that accepts single-channel input
inputs = Input(shape=(313, 149, 1))
# Create a new layer that converts the single-channel input to three channels
input_layer = Conv2D(3, (3, 3), padding='same', activation='relu')(inputs)
# Pass the converted input to the pre-trained InceptionV3 model
x = base_model(input_layer)

x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(2048, activation='relu')(x)
predictions = Dense(7)(x)

model = Model(inputs=inputs, outputs=predictions)

In [33]:
opt = keras.optimizers.Adam(learning_rate=0.0005)
model.compile(optimizer=opt, loss='MSE', metrics=['mse', 'mae', 'mape', 'accuracy'])

In [None]:
history = model.fit(train_features, train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS)

In [42]:
val_data = tf.data.Dataset.from_tensor_slices((test_features, test_labels)).batch(32)

In [None]:
# from keras.api._v2.keras import callbacks
# from keras.optimizers import SGD
# from tensorflow.keras.callbacks import EarlyStopping

# for layer in base_model.layers:
#     layer.trainable = True

# early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5)
# sgd = SGD(lr=0.01, momentum=0.9)

# model.compile(optimizer=sgd, loss='MSE', metrics=['mse', 'mae', 'mape', 'accuracy'])
# history = model.fit(train_features, train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=val_data, callbacks=[early_stopping])

In [None]:
evauation = model.evaluate(val_data)

In [None]:
if MODEL_SAVE == True:
        model.save(MODEL_PATH)