In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [2]:
features_30_seconds_filepath = "../Data/features_30_sec.csv"
features_3_seconds_filepath = "../Data/features_3_sec.csv"
mel_spectrograms_filepath = "../Data/images_original"

genres = ["blues", "classical" , "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]

In [3]:
'''loads the CSVs. Features are everything but the first col(filename), and the lables. The lables are the last column''' 
def loadCSVs(filepath):
    data = pd.read_csv(filepath, dtype = object, delimiter = ',').values
    X = data[:,1:-1]
    y = data[:,-1:]
    return X, y

In [4]:
'''This will turn the genere lables into np.array of ints'''
def lable_to_int(lables, genres):
    lable_int = np.array(lables)
    for i in range(len(genres)):
        lable_int[lable_int==genres[i]]=i
    return lable_int 


In [5]:
'''#this will turn the png where each pixel is represented by 4 values into a single value. The first three are colors, and I think the forth is transparancy.'''
def gray_scale_images(images):
    gray_images = np.dot(images[..., :3], [0.2989, 0.5870, 0.1140])
    return np.array(gray_images)

In [6]:
'''loads the mel spectrograms into a np array of images. Each image is 288, 432 pixels, and each pixel is represented by four values'''
def load_mel_spectrograms():
    image_features = []
    image_lables = []
    for genre in genres:
        print("Loading", genre)
        images_file_path = mel_spectrograms_filepath + "/" + genre
        png_files = [f for f in os.listdir(images_file_path) if f.endswith('.png')]

        for file in png_files:
            file_path = images_file_path +"/"+ file
            image = plt.imread(file_path)  # Load the image
            image_features.append(image)
            image_lables.append(genre)

    return np.array(image_features), np.array(image_lables)

In [7]:
X_30sec, y_30sec = loadCSVs(features_30_seconds_filepath)
X_3sec, y_3sec = loadCSVs(features_30_seconds_filepath)
print(X_30sec.shape, y_30sec.shape)
print(X_3sec.shape, y_3sec.shape)

(1000, 58) (1000, 1)
(1000, 58) (1000, 1)


In [11]:
X_images, y_images = load_mel_spectrograms()
X_images.shape, y_images.shape

Loading blues
Loading classical
Loading country
Loading disco
Loading hiphop
Loading jazz
Loading metal
Loading pop
Loading reggae
Loading rock


((999, 288, 432, 4), (999,))

In [12]:
#grayscale images 
X_images_gray = gray_scale_images(X_images)
X_images_gray.shape

(999, 288, 432)

In [15]:
y_30sec_int = lable_to_int(y_30sec, genres)
y_3sec_int = lable_to_int(y_3sec, genres)
y_images_int = lable_to_int(y_images, genres)
y_30sec_int.shape, y_3sec_int.shape, y_images_int.shape

((1000, 1), (1000, 1), (999,))