In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tensorflow.image import resize

In [3]:
classes= ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
input_shape = (64, 64, 1) 
num_classes = 10          

In [4]:
class CNNModel:
    def __init__(self, input_shape, num_classes):
        stddev = 0.01  

        self.input_shape = input_shape
        self.num_classes = num_classes
        self.weights = {
            'conv1': tf.Variable(tf.random.normal([3, 3, input_shape[-1], 32], stddev=stddev)),
            'conv2': tf.Variable(tf.random.normal([3, 3, 32, 32], stddev=stddev)),
            'conv3': tf.Variable(tf.random.normal([3, 3, 32, 64], stddev=stddev)),
            'conv4': tf.Variable(tf.random.normal([3, 3, 64, 64], stddev=stddev)),
            'conv5': tf.Variable(tf.random.normal([3, 3, 64, 128], stddev=stddev)),
            'conv6': tf.Variable(tf.random.normal([3, 3, 128, 128], stddev=stddev)),

            'fc1': None,
            'fc2': tf.Variable(tf.random.normal([1200, num_classes], stddev=stddev))
        }
        self.biases = {
            'conv1': tf.Variable(tf.zeros([32])),
            'conv2': tf.Variable(tf.zeros([32])),
            'conv3': tf.Variable(tf.zeros([64])),
            'conv4': tf.Variable(tf.zeros([64])),
            'conv5': tf.Variable(tf.zeros([128])),
            'conv6': tf.Variable(tf.zeros([128])),
            'fc1': None,
            'fc2': tf.Variable(tf.zeros([num_classes]))
        }

    def build_fc1(self, x):
        #dynamically calculate the flattened size
        flatten_dim = np.prod(x.shape[1:])  # Total size after flattening
        self.weights['fc1'] = tf.Variable(tf.random.normal([flatten_dim, 1200], stddev=0.01))
        self.biases['fc1'] = tf.Variable(tf.zeros([1200]))

    def forward(self, x, is_training=True):
        #convolutional layers with ReLU, Max Pooling, and Dropout
        x = tf.nn.conv2d(x, self.weights['conv1'], strides=1, padding='SAME') + self.biases['conv1']
        x = tf.nn.relu(x)
        x = tf.nn.conv2d(x, self.weights['conv2'], strides=1, padding='VALID') + self.biases['conv2']
        x = tf.nn.relu(x)
        x = tf.nn.max_pool2d(x, ksize=2, strides=2, padding='SAME')

        x = tf.nn.conv2d(x, self.weights['conv3'], strides=1, padding='SAME') + self.biases['conv3']
        x = tf.nn.relu(x)
        x = tf.nn.conv2d(x, self.weights['conv4'], strides=1, padding='VALID') + self.biases['conv4']
        x = tf.nn.relu(x)
        x = tf.nn.max_pool2d(x, ksize=2, strides=2, padding='SAME')

        x = tf.nn.conv2d(x, self.weights['conv5'], strides=1, padding='SAME') + self.biases['conv5']
        x = tf.nn.relu(x)
        x = tf.nn.conv2d(x, self.weights['conv6'], strides=1, padding='VALID') + self.biases['conv6']
        x = tf.nn.relu(x)
        x = tf.nn.max_pool2d(x, ksize=2, strides=2, padding='SAME')

        if is_training:
            x = tf.nn.dropout(x, rate=0.3)

        #flatten
        x = tf.reshape(x, [x.shape[0], -1])

        #initialize fully connected layer weights if not already done
        if self.weights['fc1'] is None:
            self.build_fc1(x)

        #fully connected layers
        x = tf.matmul(x, self.weights['fc1']) + self.biases['fc1']
        x = tf.nn.relu(x)

        if is_training:
            x = tf.nn.dropout(x, rate=0.4)

        x = tf.matmul(x, self.weights['fc2']) + self.biases['fc2']
        return x  

In [5]:
#initialize the model
model = CNNModel(input_shape=input_shape, num_classes=num_classes)

In [6]:
save_dir = "/content/drive/MyDrive/Colab Notebooks/model_backup_final/saved_model_final"
#forfuture use loading weights and biases
def load_model_weights(model, save_dir):

    for name, variable in model.weights.items():
        model.weights[name] = tf.convert_to_tensor(np.load(os.path.join(save_dir, f"{name}_weights.npy")))
    for name, variable in model.biases.items():
        model.biases[name] = tf.convert_to_tensor(np.load(os.path.join(save_dir, f"{name}_biases.npy")))

    for name, variable in model.weights.items():
        print(f"Loaded weight {name}: {variable.shape}")
    for name, variable in model.biases.items():
        print(f"Loaded bias {name}: {variable.shape}")

    # for name, weight in model.weights.items():
    #     if weight is None:
    #         print(f"Weight '{name}' is not initialized. Skipping...")
    #         continue
    #     weight_path = os.path.join(save_dir, f"{name}_weights.npy")
    #     if os.path.exists(weight_path):
    #         # Use assign to update weights
    #         weight.assign(np.load(weight_path))
    #         print(f"Loaded weights for {name} from {weight_path}")
    #     else:
    #         print(f"Weight file not found for {name}: {weight_path}")

    # for name, bias in model.biases.items():
    #     if bias is None:
    #         print(f"Bias '{name}' is not initialized. Skipping...")
    #         continue
    #     bias_path = os.path.join(save_dir, f"{name}_biases.npy")
    #     if os.path.exists(bias_path):
    #         # Use assign to update biases
    #         bias.assign(np.load(bias_path))
    #         print(f"Loaded biases for {name} from {bias_path}")
    #     else:
    #         print(f"Bias file not found for {name}: {bias_path}")

    print("Model weights and biases loaded successfully.")

In [7]:
load_model_weights(model, save_dir)

Loaded weight conv1: (3, 3, 1, 32)
Loaded weight conv2: (3, 3, 32, 32)
Loaded weight conv3: (3, 3, 32, 64)
Loaded weight conv4: (3, 3, 64, 64)
Loaded weight conv5: (3, 3, 64, 128)
Loaded weight conv6: (3, 3, 128, 128)
Loaded weight fc1: (6272, 1200)
Loaded weight fc2: (1200, 10)
Loaded bias conv1: (32,)
Loaded bias conv2: (32,)
Loaded bias conv3: (64,)
Loaded bias conv4: (64,)
Loaded bias conv5: (128,)
Loaded bias conv6: (128,)
Loaded bias fc1: (1200,)
Loaded bias fc2: (10,)
Model weights and biases loaded successfully.


In [8]:
#load and preprocess audio data
def load_and_preprocess_file(file_path, target_shape=(64,64)):
    data = []
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    # Perform preprocessing (e.g., convert to Mel spectrogram and resize)
    # Define the duration of each chunk and overlap
    chunk_duration = 4  # seconds
    overlap_duration = 2  # seconds

    #convert durations to samples
    chunk_samples = chunk_duration * sample_rate
    overlap_samples = overlap_duration * sample_rate

    #calculate the number of chunks
    num_chunks = int(np.ceil((len(audio_data) - chunk_samples) / (chunk_samples - overlap_samples))) + 1

    #iterate over each chunk
    for i in range(num_chunks):
        start = i * (chunk_samples - overlap_samples)
        end = start + chunk_samples

        chunk = audio_data[start:end]

       #compute the Mel spectrogram for the chunk
        mel_spectrogram = librosa.feature.melspectrogram(y=chunk, sr=sample_rate)
        mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
        data.append(mel_spectrogram)

    return np.array(data)

In [9]:
test_input = np.random.rand(1, *input_shape).astype(np.float32)
test_output = model.forward(test_input, is_training=False)
print(f"Test output shape: {test_output.shape}")

Test output shape: (1, 10)


In [20]:
file_path = "/content/drive/MyDrive/Colab Notebooks/test_music/reggae.00004.wav"
X_test=load_and_preprocess_file(file_path)

In [21]:
X_test.shape

(15, 64, 64, 1)

In [22]:
def model_prediction(model, X_test):
    #to forward pass through the model
    logits = model.forward(X_test, is_training=False)

    #then coompute probabilities using softmax
    probabilities = tf.nn.softmax(logits, axis=1).numpy()

    #to predict categories (argmax of probabilities)
    predicted_categories = np.argmax(probabilities, axis=1)

    #to get unique elements and their counts
    unique_elements, counts = np.unique(predicted_categories, return_counts=True)

    #this dtermine the most frequent predicted category
    max_count = np.max(counts)
    max_elements = unique_elements[counts == max_count]

    return max_elements[0]

In [23]:
predicted_genre = model_prediction(model, X_test)
print(f"The predicted genre is: {predicted_genre}",classes[predicted_genre])

The predicted genre is: 8 reggae


In [14]:
print(f"Model input shape: {input_shape}")
print(f"Number of classes: {num_classes}")

Model input shape: (64, 64, 1)
Number of classes: 10


In [15]:
for name, variable in model.weights.items():
    print(f"Weight {name}: {variable.numpy().shape}")
for name, variable in model.biases.items():
    print(f"Bias {name}: {variable.numpy().shape}")

Weight conv1: (3, 3, 1, 32)
Weight conv2: (3, 3, 32, 32)
Weight conv3: (3, 3, 32, 64)
Weight conv4: (3, 3, 64, 64)
Weight conv5: (3, 3, 64, 128)
Weight conv6: (3, 3, 128, 128)
Weight fc1: (6272, 1200)
Weight fc2: (1200, 10)
Bias conv1: (32,)
Bias conv2: (32,)
Bias conv3: (64,)
Bias conv4: (64,)
Bias conv5: (128,)
Bias conv6: (128,)
Bias fc1: (1200,)
Bias fc2: (10,)
