In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [None]:
features_30_seconds_filepath = "../Data/features_30_sec.csv"
features_3_seconds_filepath = "../Data/features_3_sec.csv"
mel_spectrograms_filepath = "../Data/images_original"

genres = ["blues", "classical" , "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

In [None]:
'''loads the CSVs. Features are everything but the first col(filename), and the lables. The lables are the last column''' 
def loadCSVs(filepath):
    data = pd.read_csv(filepath, dtype = object, delimiter = ',').values
    X = data[:,2:-1]
    y = data[:,-1:]
    return X, y

In [None]:
'''This will turn the genere lables into np.array of ints'''
def lable_to_int(lables, genres):
    lable_int = np.array(lables)
    for i in range(len(genres)):
        lable_int[lable_int==genres[i]]=i
    return lable_int 


In [None]:
'''#this will turn the png where each pixel is represented by 4 values into a single value. The first three are colors, and I think the forth is transparancy.'''
def gray_scale_images(images):
    gray_images = np.dot(images[..., :3], [0.2989, 0.5870, 0.1140])
    return np.array(gray_images)

In [None]:
'''loads the mel spectrograms into a np array of images. Each image is 288, 432 pixels, and each pixel is represented by four values'''
def load_mel_spectrograms():
    image_features = []
    image_lables = []
    for genre in genres:
        print("Loading", genre)
        images_file_path = mel_spectrograms_filepath + "/" + genre
        png_files = [f for f in os.listdir(images_file_path) if f.endswith('.png')]

        for file in png_files:
            file_path = images_file_path +"/"+ file
            image = plt.imread(file_path)  # Load the image
            image_features.append(image)
            image_lables.append(genre)

    return np.array(image_features), np.array(image_lables)

In [None]:
string_X_30sec, y_30sec = loadCSVs(features_30_seconds_filepath)
X_30sec = string_X_30sec.astype(np.float64)
string_X_3sec, y_3sec = loadCSVs(features_3_seconds_filepath)
X_3sec = string_X_3sec.astype(np.float64)
print(X_30sec.shape, y_30sec.shape)
print(X_3sec.shape, y_3sec.shape)

In [None]:
X_images, y_images = load_mel_spectrograms()
y_images = lable_to_int(y_images,genres)
X_images.shape, y_images.shape

In [None]:
from sklearn.model_selection import train_test_split
X_images_train, X_images_temp, y_images_train, y_images_temp = train_test_split(X_images, y_images, test_size=0.2, shuffle=True, random_state=7)
X_images_val, X_images_test, y_images_val, y_images_test = train_test_split(X_images_temp, y_images_temp, test_size=0.5, shuffle=True, random_state=7)

In [None]:
#grayscale images 
X_images_gray = gray_scale_images(X_images)
X_images_gray.shape

In [None]:
y_30sec_int = lable_to_int(y_30sec, genres)
y_3sec_int = lable_to_int(y_3sec, genres)
y_images_int = lable_to_int(y_images, genres)
y_30sec_int.shape, y_3sec_int.shape, y_images_int.shape

In [None]:
'''Create normalized and standardized versions of data'''
X_30sec_norm = (X_30sec-np.min(X_30sec, axis=0))/(np.max(X_30sec,axis=0)-np.min(X_30sec,axis=0))
X_3sec_norm = (X_3sec-np.min(X_3sec, axis=0))/(np.max(X_3sec,axis=0)-np.min(X_3sec,axis=0))
X_30sec_std = (X_30sec-np.mean(X_30sec, axis=0))/(np.std(X_30sec, axis=0))
X_3sec_std = (X_3sec-np.mean(X_3sec, axis=0))/(np.std(X_3sec, axis=0))

In [None]:
'''Check norm and std data'''
print(np.max(X_30sec_norm)==1,np.min(X_30sec_norm)==0)
print(np.max(X_3sec_norm)==1,np.min(X_3sec_norm)==0)
print(np.mean(X_30sec_std), np.std(X_30sec_std))
print(np.mean(X_3sec_std), np.std(X_3sec_std))

In [None]:
'''TensorFlow setup'''
import tensorflow as tf
from tensorflow.keras import layers, models
print("TensorFlow version:", tf.__version__)

In [None]:
'''Create CNN'''
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(288, 432, 4)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(16, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.summary()

In [None]:
''' 
THIS CELL HAS A LONG RUNTIME
    By using this command I determined that the model seems to work best when
    using kernel size 32 for layer 1, kernel size 16 for layer 2, and no layer 3.
    Moving forward I will use this CNN configuration to cross-validate and choose hyperparameters
'''
def choose_CNN(l1, l2, l3):
    model = models.Sequential()
    model.add(layers.Conv2D(l1, (3, 3), activation='relu', input_shape=(288, 432, 4)))
    if l2 != None:
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(l2, (3, 3), activation='relu'))
    if l3 != None:
        model.add(layers.MaxPooling2D((2, 2)))
        model.add(layers.Conv2D(l3, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10))
    model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
    history = model.fit(X_images_train_tens, y_images_train_tens, epochs=10, validation_data=(X_images_val_tens, y_images_val_tens))
    return model

'''Train and test CNN'''
X_images_train_tens = tf.convert_to_tensor(X_images_train, dtype=float)
y_images_train_tens = tf.convert_to_tensor(y_images_train.astype(np.float32), dtype=float)
X_images_val_tens = tf.convert_to_tensor(X_images_val, dtype=float)
y_images_val_tens = tf.convert_to_tensor(y_images_val.astype(np.float32), dtype=float)

layer1 = [16,32,64]
layer2 = [None,16,32]
layer3 = [None,16,32]
allModels = []

for l1 in layer1:
    for l2 in layer2:
        for l3 in layer3:
            allModels.append(choose_CNN(l1,l2,l3))

for m in allModels:
    m.evaluate(X_images_val_tens,  y_images_val_tens, verbose=2)

In [None]:
# Choosing hyper for best model (layer 1 = 32, layer 2 = 16)
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(288, 432, 4)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(16, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

# Set up cross-validation and choose hyperparameters