In [1]:
import os
import glob
import librosa, librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import pandas as pd

## Mel Spectrogram

In [2]:
def mel_spectrogram_image(fileName, output_file):
    # waveform 
    signal, sr = librosa.load(fileName, sr = 44100)

    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(signal, sr=44100, n_fft=2048, hop_length=512)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.amplitude_to_db(S, ref=np.max)

    # Make a new figure
    plt.figure(figsize=(12,4))

    # Display the spectrogram on a mel scale
    # sample rate and hop length parameters are used to render the time axis
    librosa.display.specshow(log_S, sr=44100)
    # Put a descriptive title on the plot

    ## saving
    plt.savefig(output_file)

In [3]:
directory = "C:/Users/edavi/Desktop/audio/queen_project/Audio/"

In [4]:
# directory = "C:/Users/edavi/Desktop/audio/queen_project/Audio/"
# for artist in ['Brian', 'Freddie', 'John', 'Roger']:
#   filenames = os.listdir(os.path.join(directory,f"{artist}"))

#   for f in filenames:
#         file_name = directory + artist + '/' + f
#         output_filename = "C:/Users/edavi/Desktop/audio/queen_project/Spectrograms/" + artist + '/' + f[:-3] + 'png'
#         mel_spectrogram_image(file_name, output_filename)

## Importing Images

In [5]:
## creating complete array of spectrograms
spectrograms = []
artists = []
for artist in ['Brian', 'Freddie', 'John', 'Roger']:
    for spectro in os.listdir(f"C:/Users/edavi/Desktop/audio/queen_project/Spectrograms/{artist}"):
        spectrograms.append(np.array(Image.open(f"C:/Users/edavi/Desktop/audio/queen_project/Spectrograms/{artist}/" + spectro)))
        artists.append(artist)
        
spectrograms = np.array(spectrograms)

### Scaling images

In [6]:
## scaler
spectro_ss = spectrograms/255
spectro_ss.shape

(155, 288, 864, 4)

### one hot encoding

In [7]:
from tensorflow.keras.utils import to_categorical

In [8]:
## mapping
artist_map = {'Brian':0, 'Freddie':1, 'John':2, 'Roger':3}
artist_categorical = to_categorical(pd.Series(artists).map(artist_map))

### train test split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(spectro_ss, artist_categorical, test_size = 0.2)

In [10]:
X_train.shape

(124, 288, 864, 4)

## model

In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, InputLayer, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

In [12]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  print('Hola')
  pass

Hola


In [13]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [14]:
## architecture
model = Sequential()
model.add(InputLayer(input_shape=(288,864,4)))
model.add(Conv2D(filters = 30, kernel_size = (2,2), padding = 'valid', strides = 1, activation = 'relu'))
model.add(MaxPool2D(pool_size = (3,3), strides=(2,2)))
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(4, activation = 'softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 287, 863, 30)      510       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 143, 431, 30)      0         
_________________________________________________________________
flatten (Flatten)            (None, 1848990)           0         
_________________________________________________________________
dense (Dense)                (None, 64)                118335424 
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 260       
Total params: 118,336,194
Trainable params: 118,336,194
Non-trainable params: 0
_________________________________________________________________


In [15]:
## compiling
model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['accuracy'])
model.fit(X_train, y_train, epochs = 70, batch_size = 32, validation_data = (X_test, y_test))

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70

KeyboardInterrupt: 