In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spicy
import sys
import os
import pickle
import librosa
import librosa.display
from IPython.display import Audio
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

# We will extract all the features for all the genres and their songs

In [2]:
# Directories containing the audio files for different genres
genre_dirs = ['./rawData/rock/', './rawData/pop/', './rawData/arabesk/', './rawData/turk_sanat/', './rawData/jazz/', './rawData/rap/']

genre_names = ['rock', 'pop', 'arabesk', 'turk_sanat', 'jazz', 'rap']

In [3]:
def extract_audio_features(audio_path):
    # Load the audio file
    x, sr = librosa.load(audio_path)

    # Initialize dictionary to store features
    features = {}

    # Compute the Short-Time Fourier Transform (STFT)
    stft = librosa.stft(x)
    stft_db = librosa.amplitude_to_db(abs(stft))
    features['stft_db'] = stft_db

    # Compute Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=x+0.01, sr=sr)[0]
    # Desired length of the spectral rolloff array
    desired_length = 1280

    # Interpolate the spectral rolloff to the desired length
    if len(spectral_rolloff) != desired_length:
        # Create an interpolation function
        interp_func = interp1d(np.linspace(0, 1, len(spectral_rolloff)), spectral_rolloff, kind='linear')

        # Apply the interpolation to get the desired length
        spectral_rolloff = interp_func(np.linspace(0, 1, desired_length))
        
    features['spectral_rolloff'] = spectral_rolloff

    # Compute Zero Crossing Rate (ZCR)
    zcr = librosa.feature.zero_crossing_rate(x)
    features['zcr'] = zcr

    # Compute Chroma feature
    chroma = librosa.feature.chroma_stft(y=x, sr=sr)
    features['chroma'] = chroma

    # Compute MFCCs
    mfccs = librosa.feature.mfcc(y=x, sr=sr)
    features['mfccs'] = mfccs

    # Compute RMS feature
    rms = librosa.feature.rms(y=x)
    features['rms'] = rms

    # Compute Spectral Centroid feature
    spectral_centroid = librosa.feature.spectral_centroid(y=x, sr=sr)
    features['spectral_centroid'] = spectral_centroid

    return features

In [5]:
def extract_features_from_genres(genre_names):
    all_features = []
    
    # Iterate through each genre in genre_names
    for genre_index, genre in enumerate(genre_names):
        genre_dir = os.path.join("./rawData", genre)
        
        # Check if genre directory exists
        if os.path.exists(genre_dir):
            print(f"Processing genre: {genre}")
            
            # Iterate through all files in the genre directory
            for root, dirs, files in os.walk(genre_dir):
                for file in files:
                    if file.lower().endswith(('.mp3', '.wav', '.flac')):  # Accept audio files
                        audio_path = os.path.join(root, file)
                        print(f"Extracting features from: {audio_path}")
                        
                        # Extract features from the audio file
                        features = extract_audio_features(audio_path)

                        # Add the file name to the features dictionary
                        features['file_name'] = file
                        features['label'] = genre_index  # Add genre label

                        # Append features to the list
                        all_features.append(features)

    return all_features

In [6]:
# Extract features from all audio files in the genres
all_features = extract_features_from_genres(genre_names)

Processing genre: rock
Extracting features from: ./rawData\rock\100_Paramparça.mp3
Extracting features from: ./rawData\rock\10_Cevapsız Sorular.mp3
Extracting features from: ./rawData\rock\11_Yıldızlar Üstüne Parlıyorken.mp3
Extracting features from: ./rawData\rock\12_Haydi Gel İçelim.mp3
Extracting features from: ./rawData\rock\13_Ben Seni Ararken.mp3
Extracting features from: ./rawData\rock\14_Anlayamazsın.mp3
Extracting features from: ./rawData\rock\15_Beni Yakın.mp3
Extracting features from: ./rawData\rock\16_Aşk Nerden Nereye.mp3
Extracting features from: ./rawData\rock\17_Sarılsak Geçerdi.mp3
Extracting features from: ./rawData\rock\18_Oyunbozan.mp3
Extracting features from: ./rawData\rock\19_Beni Aşka İnandır.mp3
Extracting features from: ./rawData\rock\1_Belki.mp3
Extracting features from: ./rawData\rock\20_Her Yol Uçurum.mp3
Extracting features from: ./rawData\rock\21_Gemiler.mp3
Extracting features from: ./rawData\rock\22_Gamzedeyim Deva Bulmam.mp3
Extracting features from: .

In [8]:
with open('features.pkl', 'wb') as f:
    pickle.dump(all_features, f)

print(f"Extracted features for {len(all_features)} audio files.")

Extracted features for 558 audio files.


# The 1D features are all of same length 1280 but 2D features are of different length in both dimensions also they can't be collapsed into 1D since it would be nonsensical because of their information type. So this is the final 

In [17]:
# Open the pickle file in read-binary mode
with open('features.pkl', 'rb') as file:
    # Load the content of the pickle file into a Python object
    loaded_object = pickle.load(file)

# Now you can use the loaded object as needed
print(loaded_object[0])

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x00000268B6B7EEF0>>
Traceback (most recent call last):
  File "c:\Users\PC\Documents\GitHub\CS464_ML_Project\venv\lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


{'stft_db': array([[  8.279913  , -22.848013  ,   2.7390857 , ...,  -4.8315883 ,
         -0.5552592 ,  -0.40884867],
       [ 12.51931   ,  13.182785  ,  16.3331    , ...,  -0.07186656,
          8.221512  ,  11.59392   ],
       [ 20.513784  ,  22.806252  ,   1.2922056 , ...,  -1.1368241 ,
         11.856032  ,  16.015114  ],
       ...,
       [-33.62595   , -33.62595   , -33.62595   , ..., -33.62595   ,
        -33.62595   , -33.62595   ],
       [-33.62595   , -33.62595   , -33.62595   , ..., -33.62595   ,
        -33.62595   , -33.62595   ],
       [-33.62595   , -33.62595   , -33.62595   , ..., -33.62595   ,
        -33.62595   , -33.62595   ]], dtype=float32), 'spectral_rolloff': array([6513.79394531, 6707.59277344, 6987.52441406, ..., 6524.56054688,
       6266.16210938, 5953.93066406]), 'zcr': array([[0.06005859, 0.09277344, 0.11376953, ..., 0.1796875 , 0.14453125,
        0.08789062]]), 'chroma': array([[0.35812885, 0.5844225 , 0.3431106 , ..., 0.05782335, 0.10968778,
      