In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import librosa
import warnings
import os
warnings.filterwarnings("ignore")

In [None]:
def extractFeatures(signal, sr):
    features = []

    #zero crossing
    zero_crossing = librosa.zero_crossings(signal, pad = False)
    features.append(zero_crossing.sum()) # f0

    #spectral centroids
    spectral_centroids = librosa.feature.spectral_centroid(signal, sr=sr)[0]
    features.append(spectral_centroids.mean()) # f1
    features.append(spectral_centroids.var()) # f2

    #spectral rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(signal+0.01, sr=sr)[0]
    features.append(spectral_rolloff.mean()) # f3
    features.append(spectral_rolloff.var()) # f4

    #Chroma Frequencies
    chromagram = librosa.feature.chroma_stft(signal, sr=sr)
    features += chromagram.mean(axis=1).tolist() # f5 - f16
    features += chromagram.var(axis=1).tolist() # f17 - f28
    
    #Mel-Frequency Cepstral Coefficients
    MFCCs = librosa.feature.mfcc(signal)
    features += MFCCs.mean(axis=1).tolist() # f29 - f48
    features += MFCCs.var(axis=1).tolist() # f49 - f68

    return features

In [None]:
df = pd.DataFrame(columns=['name', 'dastgah', 'instrument',
                           'zero_corssing',
                           'spectral_centroid_mean','spectral_centroid_var',
                           'spectral_rolloff_mean','spectral_rolloff_var',
                           'chroma_1_mean','chroma_2_mean','chroma_3_mean','chroma_4_mean','chroma_5_mean','chroma_6_mean','chroma_7_mean','chroma_8_mean','chroma_9_mean','chroma_10_mean','chroma_11_mean','chroma_12_mean',
                           'chroma_1_var','chroma_2_var','chroma_3_var','chroma_4_var','chroma_5_var','chroma_6_var','chroma_7_var','chroma_8_var','chroma_9_var','chroma_10_var','chroma_11_var','chroma_12_var',
                           'mfcc_1_mean','mfcc_2_mean','mfcc_3_mean','mfcc_4_mean','mfcc_5_mean','mfcc_6_mean','mfcc_7_mean','mfcc_8_mean','mfcc_9_mean','mfcc_10_mean','mfcc_11_mean','mfcc_12_mean','mfcc_13_mean','mfcc_14_mean','mfcc_15_mean','mfcc_16_mean','mfcc_17_mean','mfcc_18_mean','mfcc_19_mean','mfcc_20_mean',
                           'mfcc_1_var','mfcc_2_var','mfcc_3_var','mfcc_4_var','mfcc_5_var','mfcc_6_var','mfcc_7_var','mfcc_8_var','mfcc_9_var','mfcc_10_var','mfcc_11_var','mfcc_12_var','mfcc_13_var','mfcc_14_var','mfcc_15_var','mfcc_16_var','mfcc_17_var','mfcc_18_var','mfcc_19_var','mfcc_20_var'])

parent_path = 'drive/MyDrive/ML_Data/ML_prj_2023'
for dastgah in os.listdir(parent_path):
    if dastgah != 'mlproject.csv':
        print('processing on dastgah: ', dastgah)
        for instrument in os.listdir(parent_path+'/'+dastgah):
            print('\t processing on instrument: ', instrument)
            for music in os.listdir(parent_path+'/'+dastgah+'/'+instrument):
                try:
                    sig, sr = librosa.load(parent_path+'/'+dastgah+'/'+instrument+'/'+music, offset=0, duration=40)
                    SAMPLES_TO_CONSIDER = sr * 40
                    if len(sig) >= SAMPLES_TO_CONSIDER:
                        sig = sig[:SAMPLES_TO_CONSIDER]
                        features = extractFeatures(sig, sr)
                        df.loc[len(df)] = [music, dastgah, instrument] + features
                except:
                    print("An exception occurred while reading %s" %(music))

processing on dastgah:  D_2
	 processing on instrument:  I_4
	 processing on instrument:  I_0
	 processing on instrument:  I_5
	 processing on instrument:  I_2
	 processing on instrument:  I_3
	 processing on instrument:  I_1
processing on dastgah:  D_6
	 processing on instrument:  I_4
	 processing on instrument:  I_0
	 processing on instrument:  I_2
	 processing on instrument:  I_1
	 processing on instrument:  I_5
	 processing on instrument:  I_3
processing on dastgah:  D_0
	 processing on instrument:  I_5
	 processing on instrument:  I_0
	 processing on instrument:  I_3
	 processing on instrument:  I_2
	 processing on instrument:  I_4
	 processing on instrument:  I_1
processing on dastgah:  D_4
	 processing on instrument:  I_2
	 processing on instrument:  I_3
	 processing on instrument:  I_5
	 processing on instrument:  I_0
	 processing on instrument:  I_4
	 processing on instrument:  I_1
processing on dastgah:  D_3
	 processing on instrument:  I_2
	 processing on instrument:  I_1
	 

In [None]:
from pathlib import Path
filepath = Path('dataset.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(filepath, index=False)