In [1]:
# import libraries

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import librosa
import librosa.display
from IPython.display import Audio, display
from google.colab import drive
import math
import os
import csv

### Global vars

In [2]:
WINDOW_LENGTH = 23 # ms
OVERLAP = 0.5
WINDOW_FUN = 'hamm' # hamming window
SR = 22000 # sampling rate
BASE_DIR = '/content/drive/MyDrive/Dataset/GTZAN/genres_original'
LABELS = []
MFCC_BIN = 13

# Derived vars
frame_len = math.floor((SR/1000)*WINDOW_LENGTH)
hop_len = math.floor(frame_len * (1 - OVERLAP))
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Check Dataset Files


In [None]:
for root, dirs, files in os.walk(BASE_DIR):
  for directory in dirs:
      LABELS.append(directory) # append the directory name as genre labels
      dir_path = os.path.join(root, directory)
      print(f"Directory: {dir_path}")

      for file in os.listdir(dir_path):
          file_path = os.path.join(dir_path, file)
          if os.path.isfile(file_path):
              print(f"  File: {file_path}")

Directory: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00000.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00005.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00006.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00004.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00001.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00003.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00011.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00008.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00002.wav
  File: /content/drive/MyDrive/Dataset/GTZAN/genres_original/classical/classical.00010.wav
  File: /content

## Custom Windowing Functions

In [None]:
def hamming():
  n = np.linspace(0, frame_len, num=frame_len, endpoint=False)
  return 0.54 - 0.46 * np.cos(2*np.pi*n / (frame_len-1))

def windowing(audio, window_type='rect'):
  n = len(audio)
  n_frame = 1 + n // hop_len
  n_padded = max(n, hop_len * n_frame + frame_len)
  padded_signal = audio if n_padded == n else np.pad(audio, (0, n_padded-n), 'constant')
  frames = np.empty((n_frame, frame_len))
  for k in range(n_frame):
    frames[k, :] = padded_signal[k*hop_len : k*hop_len + frame_len]
  if window_type == 'hamm':
    hamm_window = hamming()
    frames = frames * hamm_window
  return frames

## Short Time Fourier Transform
Digunakan untuk mengekstrasi fitur dalam frequency domain

In [None]:
def stft(windows):
    # Use rfft to get the positive frequencies
    res = np.fft.rfft(windows, axis=1)
    return res

## Fitur 01 - Zero Crossing Rate (Pitch Feature)


In [None]:
def zcr(windows):
  signal_sgn = np.sign(windows)
  left = signal_sgn[:, 1:]
  right = signal_sgn[:, :-1]
  diff = np.absolute(left - right) / 2
  return np.mean(diff, axis=1)

def zcr_mean(zcr_vector):
    # Calculate the mean of the ZCR vector
    mean_zcr = np.mean(zcr_vector)
    return mean_zcr

## Fitur 02 - Root Means Square Energy (Tempo Feature)

In [None]:
def rms_energy(windows):
    energies = np.sum(windows**2, axis=1) / windows.shape[1]
    mean_energy = np.mean(energies)
    rms_energy = np.sqrt(mean_energy)
    return rms_energy

## Fitur 03 - Beat Per Minute / BPM (Tempo Feature):
Feature extracted using Librosa

In [None]:
def bpm(audio):
  onset_env = librosa.onset.onset_strength(y=audio, sr=SR)
  bpm = librosa.beat.tempo(onset_envelope=onset_env, sr=SR)
  return bpm[0]

## Fitur 04 - Spectral Centroid (Magnitude Feature):

In [None]:
def spectral_centroid(stft_frames):
    # Compute the magnitude spectrum
    magnitude_spectrum = np.abs(stft_frames)

    # Compute the frequency values for each bin
    num_bins = magnitude_spectrum.shape[1]
    frequency_values = np.fft.fftfreq(num_bins, d=1/SR)

    # Ensure frequency_values is a row vector
    frequency_values = frequency_values[:num_bins]  # Only keep the positive frequencies
    frequency_values = frequency_values.reshape((1, -1))

    # Compute the spectral centroid
    weighted_sum = np.sum(frequency_values * magnitude_spectrum, axis=1)
    sum_magnitude = np.sum(magnitude_spectrum, axis=1)
    centroids = weighted_sum / sum_magnitude

    return centroids

## Fitur 05 - Spectral Rolloff (Magnitude Feature):

In [None]:
def spectral_rolloff(stft_frames, rolloff_percent=0.85):
    magnitude_spectrum = np.abs(stft_frames)
    total_energy = np.sum(magnitude_spectrum, axis=1)
    cumulative_energy = np.cumsum(magnitude_spectrum, axis=1)
    rolloff_threshold = rolloff_percent * total_energy[:, np.newaxis]
    rolloff_freq_bin = np.argmax(cumulative_energy >= rolloff_threshold, axis=1)
    num_bins = stft_frames.shape[1]
    frequencies = np.fft.fftfreq(num_bins, d=1/SR)
    rolloff_frequencies = frequencies[rolloff_freq_bin]

    return rolloff_frequencies / np.max(rolloff_frequencies)

## Fitur 06 - Spectral Flux (Magnitude Feature)



In [None]:
def spectral_flux(stft_frames):
    magnitude_spectrum = np.abs(stft_frames)
    flux = np.sqrt(np.sum((np.diff(magnitude_spectrum, axis=1))**2, axis=0))

    return flux / np.max(flux)

## Fitur 07 - Mel-Frequency Cepstral Coefficient
Feature extracted using Librosa

In [None]:
def mfcc(audio):
  mfccs = librosa.feature.mfcc(y=audio, sr=SR, n_mfcc=MFCC_BIN, window='hamming', hop_length=hop_len, win_length=frame_len)
  return mfccs

## Fitur 08 - Chroma

In [31]:
# def compute_chroma_filter_bank(n_fft, n_chroma=12):
#     freqs = np.fft.rfftfreq(n_fft, 1/SR)
#     chroma_filter = np.zeros((n_chroma, len(freqs)))

#     A440 = 440.0
#     notes = np.array([A440 * 2**(i / 12) for i in range(-57, 50)])

#     for chroma in range(n_chroma):
#         filter = np.zeros(len(freqs))
#         for note in notes:
#             diff = np.abs(freqs - note)
#             closest = np.argmin(diff)
#             filter[closest] += 1
#         chroma_filter[chroma, :] = filter

#     return chroma_filter

# def chroma(stft_frames, n_chroma=12):
#     n_fft = 2 * (stft_frames.shape[1] - 1)
#     chroma_filter = compute_chroma_filter_bank(n_fft, n_chroma)

#     chromas = np.dot(chroma_filter, np.abs(stft_frames.T))

#     # Normalize
#     chromas = chromas / np.sum(chromas, axis=0, keepdims=True)

#     return chromas

def chroma(audio):
  chromas = librosa.feature.chroma_stft(y=audio, sr=SR, window='hamming', hop_length=hop_len, win_length=frame_len)
  return chromas

## Feature Extraction


In [33]:
csv_file_path = os.path.join(BASE_DIR, 'extracted_feature.csv')
file_exists = os.path.isfile(csv_file_path)
is_empty = os.stat(csv_file_path).st_size == 0 if file_exists else True

features = [
    'name',
    'zcr_mean',
    'rms_energy',
    'bpm',
    'centroid_mean',
    'rolloff_mean',
    'flux_mean',
    'mfcc',
    'chroma_mean',
    'label'
  ]

# Append new rows to the existing CSV file
with open(csv_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    if 'mfcc' in features:
      mfcc_check = True
      mfcc_bins = [f'mfcc_{i+1}' for i in range(MFCC_BIN)]
      index = features.index('mfcc')
      features[index:index + 1] = mfcc_bins
    writer.writerow(features)

    corrupted = []

    for i in range(10): #iterate through the list of genres
      dir_path = os.path.join(BASE_DIR, LABELS[i])
      print(f'Genre: {LABELS[i]}')
      for f in os.listdir(dir_path):
        print(f'  Audio file: {f}')
        name = f # Sample name
        try:
          audio, sr = librosa.load(os.path.join(dir_path, f))
        except:
          print("File corrupted - skipping feature extraction")
          corrupted.append(f)
          continue
        SR = sr
        windows = windowing(audio, 'hamm')
        stft_frames = stft(windows)

        feature_vector = [] #initialize feature vector

        if 'name' in features:
          feature_vector.append(name)
        if 'zcr_mean' in features: # Feature: ZCR Mean
          zero_cross = zcr(windows)
          zero_cross_mean = zcr_mean(zero_cross)
          feature_vector.append(zero_cross_mean)
        if 'rms_energy' in features: # Feature: Root Mean Square of energy
          rms = rms_energy(windows)
          feature_vector.append(rms)
        if 'bpm' in features: # Feature: Beat per minute
          beat_pm = bpm(audio)
          feature_vector.append(beat_pm)
        if 'centroid_mean' in features: # Feature: Spectral Centroid mean
          centroids = spectral_centroid(stft_frames)
          centroid_mean = np.mean(centroids)
          feature_vector.append(centroid_mean)
        if 'rolloff_mean' in features: # Feature: Spectral Rolloff mean
          rolloff = spectral_rolloff(stft_frames)
          rolloff_mean = np.mean(rolloff)
          feature_vector.append(rolloff_mean)
        if 'flux_mean' in features: # Feature: Spectral Flux mean
          flux = spectral_flux(stft_frames)
          flux_mean = np.mean(flux)
          feature_vector.append(flux_mean)
        if mfcc_check:
          mfcc_features = mfcc(audio)
          mfcc_mean = np.mean(mfcc_features, axis=1).flatten()
          feature_vector.extend(mfcc_mean)
        if 'chroma_mean' in features:
          chromas = chroma(audio)
          chroma_mean = np.mean(chromas)
          feature_vector.append(chroma_mean)
        feature_vector.append(LABELS[i])
        writer.writerow(feature_vector)
      print(f"Feature extraction completed, there are {len(corrupted)} corrupted file\nCorrupted files")
      print(corrupted)

Genre: classical
  Audio file: classical.00000.wav


	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  bpm = librosa.beat.tempo(onset_envelope=onset_env, sr=SR)


  Audio file: classical.00005.wav
  Audio file: classical.00006.wav
  Audio file: classical.00004.wav
  Audio file: classical.00001.wav
  Audio file: classical.00003.wav
  Audio file: classical.00011.wav
  Audio file: classical.00008.wav
  Audio file: classical.00002.wav
  Audio file: classical.00010.wav
  Audio file: classical.00009.wav
  Audio file: classical.00007.wav
  Audio file: classical.00015.wav
  Audio file: classical.00018.wav
  Audio file: classical.00017.wav
  Audio file: classical.00019.wav
  Audio file: classical.00016.wav
  Audio file: classical.00014.wav
  Audio file: classical.00020.wav
  Audio file: classical.00012.wav
  Audio file: classical.00021.wav
  Audio file: classical.00013.wav
  Audio file: classical.00023.wav
  Audio file: classical.00022.wav
  Audio file: classical.00032.wav
  Audio file: classical.00025.wav
  Audio file: classical.00031.wav
  Audio file: classical.00024.wav
  Audio file: classical.00026.wav
  Audio file: classical.00028.wav
  Audio file: 

  centroids = weighted_sum / sum_magnitude


  Audio file: classical.00042.wav
  Audio file: classical.00037.wav
  Audio file: classical.00035.wav
  Audio file: classical.00041.wav
  Audio file: classical.00033.wav
  Audio file: classical.00034.wav
  Audio file: classical.00039.wav
  Audio file: classical.00038.wav
  Audio file: classical.00036.wav
  Audio file: classical.00040.wav
  Audio file: classical.00048.wav
  Audio file: classical.00053.wav
  Audio file: classical.00049.wav
  Audio file: classical.00046.wav
  Audio file: classical.00051.wav
  Audio file: classical.00047.wav
  Audio file: classical.00044.wav
  Audio file: classical.00054.wav
  Audio file: classical.00045.wav
  Audio file: classical.00052.wav
  Audio file: classical.00050.wav
  Audio file: classical.00062.wav
  Audio file: classical.00063.wav
  Audio file: classical.00065.wav
  Audio file: classical.00058.wav
  Audio file: classical.00059.wav
  Audio file: classical.00060.wav
  Audio file: classical.00061.wav
  Audio file: classical.00056.wav
  Audio file: 

  audio, sr = librosa.load(os.path.join(dir_path, f))
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


File corrupted - skipping feature extraction
  Audio file: jazz.00056.wav


	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  bpm = librosa.beat.tempo(onset_envelope=onset_env, sr=SR)


  Audio file: jazz.00053.wav
  Audio file: jazz.00050.wav
  Audio file: jazz.00060.wav
  Audio file: jazz.00057.wav
  Audio file: jazz.00058.wav
  Audio file: jazz.00061.wav
  Audio file: jazz.00059.wav
  Audio file: jazz.00063.wav
  Audio file: jazz.00062.wav
  Audio file: jazz.00064.wav
  Audio file: jazz.00065.wav
  Audio file: jazz.00071.wav
  Audio file: jazz.00070.wav
  Audio file: jazz.00069.wav
  Audio file: jazz.00068.wav
  Audio file: jazz.00066.wav
  Audio file: jazz.00067.wav
  Audio file: jazz.00078.wav
  Audio file: jazz.00077.wav
  Audio file: jazz.00072.wav
  Audio file: jazz.00079.wav
  Audio file: jazz.00076.wav
  Audio file: jazz.00075.wav
  Audio file: jazz.00074.wav
  Audio file: jazz.00073.wav
  Audio file: jazz.00085.wav
  Audio file: jazz.00084.wav
  Audio file: jazz.00087.wav
  Audio file: jazz.00086.wav
  Audio file: jazz.00083.wav
  Audio file: jazz.00081.wav
  Audio file: jazz.00082.wav
  Audio file: jazz.00080.wav
  Audio file: jazz.00092.wav
  Audio file: 

  centroids = weighted_sum / sum_magnitude


  Audio file: disco.00016.wav
  Audio file: disco.00015.wav
  Audio file: disco.00014.wav
  Audio file: disco.00012.wav
  Audio file: disco.00017.wav
  Audio file: disco.00018.wav
  Audio file: disco.00010.wav
  Audio file: disco.00011.wav
  Audio file: disco.00019.wav
  Audio file: disco.00020.wav
  Audio file: disco.00013.wav
  Audio file: disco.00022.wav
  Audio file: disco.00021.wav
  Audio file: disco.00025.wav
  Audio file: disco.00027.wav
  Audio file: disco.00024.wav
  Audio file: disco.00029.wav
  Audio file: disco.00023.wav
  Audio file: disco.00028.wav
  Audio file: disco.00030.wav
  Audio file: disco.00026.wav
  Audio file: disco.00031.wav
  Audio file: disco.00034.wav
  Audio file: disco.00032.wav
  Audio file: disco.00036.wav
  Audio file: disco.00039.wav
  Audio file: disco.00035.wav
  Audio file: disco.00033.wav
  Audio file: disco.00042.wav
  Audio file: disco.00041.wav
  Audio file: disco.00038.wav
  Audio file: disco.00040.wav
  Audio file: disco.00037.wav
  Audio fi

## Normalization


In [6]:
def normalize_columns(input_csv, output_csv, target):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(BASE_DIR, input_csv))

    # Normalize specified columns
    for column in target:
        if column in df.columns:
            max_value = df[column].abs().max()
            df[column] = df[column] / max_value
    df.to_csv(os.path.join(BASE_DIR, output_csv), index=False)


target = ["bpm", "centroid_mean", "mfcc_1", "mfcc_2", "mfcc_3", "mfcc_4", "mfcc_5",
          "mfcc_6", "mfcc_7","mfcc_8","mfcc_9","mfcc_10","mfcc_11","mfcc_12",
          "mfcc_13"]

normalize_columns("extracted_feature.csv", "extracted_feature_NORM.csv", target)