In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [2]:
features_30_seconds_filepath = "../Data/features_30_sec.csv"
features_3_seconds_filepath = "../Data/features_3_sec.csv"
mel_spectrograms_filepath = "../Data/images_original"

genres = ["blues", "classical" , "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

In [3]:
'''loads the CSVs. Features are everything but the first col(filename), and the lables. The lables are the last column''' 
def loadCSVs(filepath):
    data = pd.read_csv(filepath, dtype = object, delimiter = ',').values
    X = data[:,2:-1]
    y = data[:,-1:]
    return X, y

In [4]:
'''This will turn the genere lables into np.array of ints'''
def lable_to_int(lables, genres):
    lable_int = np.array(lables)
    for i in range(len(genres)):
        lable_int[lable_int==genres[i]]=i
    return lable_int 


In [5]:
'''#this will turn the png where each pixel is represented by 4 values into a single value. The first three are colors, and I think the forth is transparancy.'''
def gray_scale_images(images):
    gray_images = np.dot(images[..., :3], [0.2989, 0.5870, 0.1140])
    return np.array(gray_images)

In [6]:
'''loads the mel spectrograms into a np array of images. Each image is 288, 432 pixels, and each pixel is represented by four values'''
def load_mel_spectrograms():
    image_features = []
    image_lables = []
    for genre in genres:
        print("Loading", genre)
        images_file_path = mel_spectrograms_filepath + "/" + genre
        png_files = [f for f in os.listdir(images_file_path) if f.endswith('.png')]

        for file in png_files:
            file_path = images_file_path +"/"+ file
            image = plt.imread(file_path)  # Load the image
            image_features.append(image)
            image_lables.append(genre)

    return np.array(image_features), np.array(image_lables)

In [7]:
string_X_30sec, y_30sec = loadCSVs(features_30_seconds_filepath)
X_30sec = string_X_30sec.astype(np.float64)
string_X_3sec, y_3sec = loadCSVs(features_3_seconds_filepath)
X_3sec = string_X_3sec.astype(np.float64)
print(X_30sec.shape, y_30sec.shape)
print(X_3sec.shape, y_3sec.shape)

(1000, 57) (1000, 1)
(9990, 57) (9990, 1)


In [27]:
X_images, y_images = load_mel_spectrograms()
y_images = lable_to_int(y_images,genres)
X_images.shape, y_images.shape

Loading blues
Loading classical
Loading country
Loading disco
Loading hiphop
Loading jazz
Loading metal
Loading pop
Loading reggae
Loading rock


((999, 288, 432, 4), (999,))

In [29]:
from sklearn.model_selection import train_test_split
X_images_train, X_images_temp, y_images_train, y_images_temp = train_test_split(X_images, y_images, test_size=0.2, shuffle=True)
X_images_val, X_images_test, y_images_val, y_images_test = train_test_split(X_images_temp, y_images_temp, test_size=0.5, shuffle=True)

In [9]:
#grayscale images 
X_images_gray = gray_scale_images(X_images)
X_images_gray.shape

(999, 288, 432)

In [10]:
y_30sec_int = lable_to_int(y_30sec, genres)
y_3sec_int = lable_to_int(y_3sec, genres)
y_images_int = lable_to_int(y_images, genres)
y_30sec_int.shape, y_3sec_int.shape, y_images_int.shape

((1000, 1), (9990, 1), (999,))

In [11]:
'''Create normalized and standardized versions of data'''
X_30sec_norm = (X_30sec-np.min(X_30sec, axis=0))/(np.max(X_30sec,axis=0)-np.min(X_30sec,axis=0))
X_3sec_norm = (X_3sec-np.min(X_3sec, axis=0))/(np.max(X_3sec,axis=0)-np.min(X_3sec,axis=0))
X_30sec_std = (X_30sec-np.mean(X_30sec, axis=0))/(np.std(X_30sec, axis=0))
X_3sec_std = (X_3sec-np.mean(X_3sec, axis=0))/(np.std(X_3sec, axis=0))

In [12]:
'''Check norm and std data'''
print(np.max(X_30sec_norm)==1,np.min(X_30sec_norm)==0)
print(np.max(X_3sec_norm)==1,np.min(X_3sec_norm)==0)
print(np.mean(X_30sec_std), np.std(X_30sec_std))
print(np.mean(X_3sec_std), np.std(X_3sec_std))

True True
True True
-8.78829173176966e-18 1.0
5.5902067966304e-18 1.0


In [15]:
'''TensorFlow setup'''
import tensorflow as tf
from tensorflow.keras import layers, models
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.12.0


In [19]:
'''Create CNN'''
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(288, 432, 4)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_6 (Conv2D)           (None, 286, 430, 32)      1184      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 143, 215, 32)     0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 141, 213, 64)      18496     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 70, 106, 64)      0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 68, 104, 64)       36928     
                                                                 
 flatten (Flatten)           (None, 452608)           

In [61]:
'''Train and test CNN'''
X_images_train_tens = tf.convert_to_tensor(X_images_train, dtype=float)
y_images_train_tens = tf.convert_to_tensor(y_images_train.astype(np.float32), dtype=float)
X_images_val_tens = tf.convert_to_tensor(X_images_val, dtype=float)
y_images_val_tens = tf.convert_to_tensor(y_images_val.astype(np.float32), dtype=float)
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
history = model.fit(X_images_train_tens, y_images_train_tens, epochs=1, validation_data=(X_images_val_tens, y_images_val_tens))



2024-11-25 16:53:36.179958: W tensorflow/core/framework/op_kernel.cc:1807] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to float is not supported


UnimplementedError: Graph execution error:

Detected at node 'Cast_1' defined at (most recent call last):
    File "/usr/local/anaconda3/2023.03/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/usr/local/anaconda3/2023.03/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/traitlets/config/application.py", line 992, in launch_instance
      app.start()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 736, in start
      self.io_loop.start()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
      handle._run()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell
      await result
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 546, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_4037702/237434354.py", line 7, in <module>
      history = model.fit(X_images_train_tens, y_images_train_tens, epochs=1, validation_data=(X_images_val_tens, y_images_val_tens))
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 1729, in fit
      val_logs = self.evaluate(
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 2072, in evaluate
      tmp_logs = self.test_function(iterator)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 1852, in test_function
      return step_function(self, iterator)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 1836, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 1824, in run_step
      outputs = model.test_step(data)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 1791, in test_step
      return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/training.py", line 1149, in compute_metrics
      self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 605, in update_state
      metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/utils/metrics_utils.py", line 77, in decorated
      update_op = update_state_fn(*args, **kwargs)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/metrics/base_metric.py", line 140, in update_state_fn
      return ag_update_state(*args, **kwargs)
    File "/usr/local/anaconda3/2023.03/lib/python3.10/site-packages/keras/metrics/base_metric.py", line 676, in update_state
      y_true = tf.cast(y_true, self._dtype)
Node: 'Cast_1'
Cast string to float is not supported
	 [[{{node Cast_1}}]] [Op:__inference_test_function_9448]