In [None]:
import os
from pydub import AudioSegment
import librosa
import soundfile as sf
from matplotlib import pyplot as plt
import librosa.display
import numpy as np
import io
from keras.models import load_model

# **Data Preprocessing**

**Convert MP3 to WAV**

In [None]:
mp3_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Modern/mohamed rahem composer"
wav_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Modern/Modern_WAV"

# Loop over all files in the input folder
for filename in os.listdir(mp3_folder):
    if filename.endswith(".mp3"):
        # Set input and output file paths
        mp3_path = os.path.join(mp3_folder, filename)
        wav_path = os.path.join(wav_folder, os.path.splitext(filename)[0] + ".wav")

        # Load MP3 file using pydub
        audio = AudioSegment.from_mp3(mp3_path)

        # Export audio in WAV format
        audio.export(wav_path, format="wav")


**Normalization**

In [None]:
input_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Modern/Modern_WAV"
output_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Modern/Normalized"

# Loop over all files in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        # Set input and output file paths
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Load audio file using librosa
        y, sr = librosa.load(input_path, sr=None)

        # Normalize audio using peak amplitude normalization
        y_normalized = librosa.util.normalize(y)

        # Save normalized audio to file
        #librosa.output.write_wav(output_path, y_normalized, sr)
        sf.write(output_path, y_normalized, sr)



**Onset Segmentation for Classical Dataset of length 5 seconds**

---




In [None]:
input_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Classic/Normalized"
output_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Classic/Segmented"

# Set segment length and hop length in seconds
segment_length = 5
hop_length = 2

# Loop over all files in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        # Set input and output file paths
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Load audio file using librosa
        y, sr = librosa.load(input_path, sr=None)

        # Calculate segment frame and sample lengths
        segment_frames = int(segment_length * sr)
        hop_frames = int(hop_length * sr)
        total_frames = len(y)
        total_segments = int((total_frames - segment_frames) / hop_frames) + 1

        # Segment audio using a sliding window
        for i in range(total_segments):
            # Calculate start and end frame indices for current segment
            start_frame = i * hop_frames
            end_frame = start_frame + segment_frames

            # Extract audio segment
            y_segment = y[start_frame:end_frame]

            # Set output file path for current segment
            output_segment_path = output_path.replace(".wav", f"_segment{i}.wav")

            # Save audio segment to file
            #librosa.output.write_wav(output_segment_path, y_segment, sr)
            sf.write(output_segment_path, y_segment, sr)


**Onset Segmentation for Modern Dataset of length 5 seconds**

In [None]:
input_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Modern/Normalized"
output_folder = "/content/drive/MyDrive/Classification_Classic-Modern/Modern/Segmented"

# Set segment length and hop length in seconds
segment_length = 5
hop_length = 2

# Loop over all files in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith(".wav"):
        # Set input and output file paths
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        # Load audio file using librosa
        y, sr = librosa.load(input_path, sr=None)

        # Calculate segment frame and sample lengths
        segment_frames = int(segment_length * sr)
        hop_frames = int(hop_length * sr)
        total_frames = len(y)
        total_segments = int((total_frames - segment_frames) / hop_frames) + 1

        # Segment audio using a sliding window
        for i in range(total_segments):
            # Calculate start and end frame indices for current segment
            start_frame = i * hop_frames
            end_frame = start_frame + segment_frames

            # Extract audio segment
            y_segment = y[start_frame:end_frame]

            # Set output file path for current segment
            output_segment_path = output_path.replace(".wav", f"_segment{i}.wav")

            # Save audio segment to file
            #librosa.output.write_wav(output_segment_path, y_segment, sr)
            sf.write(output_segment_path, y_segment, sr)


# **Extracting Mel-Spectrogram for Classic Dataset**

In [None]:
# Set up parameters for Mel spectrogram
n_fft = 2048
hop_length = 512
n_mels = 128

# Set up folder paths
input_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Classic\Segmented"
output_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Classic\Mel-Spectrogram"

# Loop over files in input folder
for file_name in os.listdir(input_folder):
    # Check if file is a WAV file
    if not file_name.endswith('.wav'):
        continue

    # Load audio file using librosa
    file_path = os.path.join(input_folder, file_name)
    y, sr = librosa.load(file_path)
    
    # Extracting Mel Spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=512, n_mels=10)
    
    # Converting to db
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)

    output_path = os.path.join(output_folder, file_name.replace('.wav', '_mel_spec.png'))
    
    # Extracting PNG
    plt.figure(figsize=(25, 10))
    librosa.display.specshow(log_mel_spectrogram, 
                             x_axis="time",
                             y_axis="mel", 
                             sr=sr)
    plt.colorbar(format="%+2.f")
    plt.savefig(output_path)
    



# **Extracting Mel-Spectrogram for Modern Dataset**

In [None]:
# Set up parameters for Mel spectrogram
n_fft = 2048
hop_length = 512
n_mels = 128

# Set up folder paths
input_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Modern\Segmented"
output_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Modern\Mel-spectrogram"
counter = 1
# Loop over files in input folder
for file_name in os.listdir(input_folder):
    # Check if file is a WAV file
    print(counter)
    counter += 1
    if not file_name.endswith('.wav'):
        continue

    # Load audio file using librosa
    file_path = os.path.join(input_folder, file_name)
    y, sr = librosa.load(file_path)
    
    # Extracting Mel Spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=512, n_mels=10)
    
    # Converting to db
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)

    output_path = os.path.join(output_folder, file_name.replace('.wav', '_mel_spec.png'))
    
    # Extracting PNG
    plt.figure(figsize=(25, 10))
    librosa.display.specshow(log_mel_spectrogram, 
                             x_axis="time",
                             y_axis="mel", 
                             sr=sr)
    plt.colorbar(format="%+2.f")
    plt.savefig(output_path)
    



# Extracting MFCC for Classic Dataset

In [None]:
import os
import librosa
import numpy as np

# Set up parameters for MFCC extraction
n_fft = 2048
hop_length = 512
n_mels = 128
n_mfcc = 20

# Set up folder paths
input_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Classic\Segmented"
output_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Classic\MFCC"

# Loop over files in input folder
for file_name in os.listdir(input_folder):
    # Check if file is a WAV file
    if not file_name.endswith('.wav'):
        continue

    # Load audio file using librosa
    file_path = os.path.join(input_folder, file_name)
    y, sr = librosa.load(file_path, sr=None)

    # Compute MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, n_mfcc=n_mfcc)

    # Save MFCCs to file
    output_path = os.path.join(output_folder, file_name.replace('.wav', '_mfcc.npy'))
    np.save(output_path, mfcc)


# Extracting MFCC for Modern Dataset

In [None]:
import os
import librosa
import numpy as np

# Set up parameters for MFCC extraction
n_fft = 2048
hop_length = 512
n_mels = 128
n_mfcc = 20

# Set up folder paths
input_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Modern\Segmented"
output_folder = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Modern\MFCC"

# Loop over files in input folder
for file_name in os.listdir(input_folder):
    # Check if file is a WAV file
    if not file_name.endswith('.wav'):
        continue

    # Load audio file using librosa
    file_path = os.path.join(input_folder, file_name)
    y, sr = librosa.load(file_path, sr=None)

    # Compute MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, n_mfcc=n_mfcc)

    # Save MFCCs to file
    output_path = os.path.join(output_folder, file_name.replace('.wav', '_mfcc.npy'))
    np.save(output_path, mfcc)


In [None]:
import numpy as np

path = r"D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Modern\MFCC_np\335- تعليم عزف اغنية انا في الغرام - شيرين_segment1_mfcc.npy"

file = np.load(path)
print(file)

# Preparing X_train and Y_train

In [None]:
import numpy as np
import os

# Define the paths to the two folders
Classic = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Classic\MFCC_np"
Modern = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\Modern\MFCC_np"

# Define the maximum number of columns
max_columns = 469

# Initialize X and y lists
X = []
y = []

# Load data from folder1
for file_name in os.listdir(Classic):
    if file_name.endswith('.npy'):
        # Load the Numpy array
        data = np.load(os.path.join(Classic, file_name))
        
        # Pad the array with zeros to have the same number of columns
        data_padded = np.pad(data, ((0, 0), (0, max_columns - data.shape[1])), mode='constant')
        
        # Append the padded array and label to X and y lists
        X.append(data_padded)
        y.append(0)

# Load data from folder2
for file_name in os.listdir(Modern):
    if file_name.endswith('.npy'):
        # Load the Numpy array
        data = np.load(os.path.join(Modern, file_name))
        
        # Pad the array with zeros to have the same number of columns
        data_padded = np.pad(data, ((0, 0), (0, max_columns - data.shape[1])), mode='constant')
        
        # Append the padded array and label to X and y lists
        X.append(data_padded)
        y.append(1)

# Convert X and y lists to NumPy arrays
X = np.array(X)
y = np.array(y)

# Print the shapes of X and y
print('X shape:', X.shape)
print('y shape:', y.shape)


# Splitting the Dataset

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)


In [None]:
print(X_train.shape)
print(X_test.shape)
print(X_train.shape[1:])

# Classification Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential()

# Add a 2D convolutional layer with 32 filters, a 3x3 kernel size, and ReLU activation
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]))

# Add a max pooling layer with a pool size of 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Add a second 2D convolutional layer with 64 filters, a 3x3 kernel size, and ReLU activation
model.add(Conv2D(64, (3, 3), activation='relu'))

# Add a second max pooling layer with a pool size of 2x2
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output of the convolutional layers
model.add(Flatten())

# Add a dense layer with 128 units and ReLU activation
model.add(Dense(128, activation='relu'))

# Add a dropout layer with a rate of 0.5
model.add(Dropout(0.5))

# Add the output layer with 1 unit and a sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model with binary cross-entropy loss and Adam optimizer
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print('Test loss:', loss)
print('Test accuracy:', accuracy)

In [None]:
print(model)

In [None]:
from keras.models import load_model

model.save("D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\model_1")

In [None]:
import numpy as np
np.set_printoptions(threshold=np.inf)
print(y_test)

In [None]:
y_pred = model.predict(X_test)

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('Test accuracy:', test_acc)


In [None]:
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=2)
print('Train accuracy:', train_acc)


In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
fpr, tpr, thresholds = roc_curve(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)
plt.plot(fpr, tpr, label='ROC curve (AUC = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')


In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score
precision, recall, thresholds = precision_recall_curve(y_test, y_pred)
average_precision = average_precision_score(y_test, y_pred)
plt.plot(recall, precision, label='Precision-Recall curve (AP = %0.2f)' % average_precision)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.legend(loc='lower right')


# Predict Function

In [None]:
def MP3TOWAV(mp3_file):
    if mp3_file.endswith(".mp3"):
        # Load MP3 file using pydub
        audio = AudioSegment.from_file(mp3_file)
        
        # Export the audio file to a WAV file in memory
        wav_file = io.BytesIO()
        audio.export(wav_file, format="wav")
        wav_file.seek(0)

        # Load the WAV file into memory using librosa.load()
        y, sr = librosa.load(wav_file, sr=None)
        return y, sr

def Norm(audio):
    return librosa.util.normalize(audio)

def Segment(audio, sr):
    segment_length = 5
    hop_length = 2
    Segmented = []
    
    # Calculate segment frame and sample lengths
    segment_frames = int(segment_length * sr)
    hop_frames = int(hop_length * sr)
    total_frames = len(audio)
    total_segments = int((total_frames - segment_frames) / hop_frames) + 1

    # Segment audio using a sliding window
    for i in range(total_segments):
        # Calculate start and end frame indices for current segment
        start_frame = i * hop_frames
        end_frame = start_frame + segment_frames

        # Extract audio segment
        y_segment = audio[start_frame:end_frame]
        Segmented.append(y_segment)
    return Segmented
    
def MFCC(audio, sr):
    # Extracting MFCC
    # Set up parameters for MFCC extraction
    n_fft = 2048
    hop_length = 512
    n_mels = 128
    n_mfcc = 20

    # Compute MFCCs
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels, n_mfcc=n_mfcc)
    
    num_frames = 469
    frame_length = 20
    n_frames = mfcc.shape[1]
    n_features = mfcc.shape[0]
    
    # If the number of frames is less than num_frames, pad with zeros
    if n_frames < num_frames:
        mfcc_padded = np.zeros((n_features, num_frames))
        mfcc_padded[:, :n_frames] = mfcc
        return mfcc_padded
    
    # If the number of frames is greater than num_frames, trim to num_frames
    elif n_frames > num_frames:
        return mfcc[:, :num_frames]
    
    # If the number of frames is already num_frames, return the original MFCC array
    else:
        return mfcc

def predict(model_path, file_path):
    audio, sr = MP3TOWAV(file_path)
    norm_audio = Norm(audio)
    segmented_audio = Segment(norm_audio, sr)
    mfcc_audio = []
    for audio_index in segmented_audio:
        audio_mfcc = MFCC(audio_index, sr)
        mfcc_audio.append(audio_mfcc)
    
    arr = np.array(mfcc_audio)
    model = load_model(model_path)
    y = model.predict(arr)
    output = (sum(y)/len(y))
    if (output < 0.5):
        return "Classic"
    else:
        return "Modern"

In [None]:
file_path = r"D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\test.mp3"
model_path = "D:\Education\Semester 6\CSE 321 Project Based Learning on CSE\Classification_Classic-Modern\model_1"

x = predict(model_path, file_path)
print(x)