In [2]:
import os
import librosa
import math
import json

In [3]:
DATASET_PATH = "./Final_Dataset"
JSON_PATH = "data2.json"

SAMPLE_RATE = 22050
DURATION = 0.9230839002267573
SAMPLE_PER_TRACK = SAMPLE_RATE*DURATION

In [4]:
def save_mfcc(dataset_path,json_path,n_mfcc = 30,n_fft = 2048,hop_length = 512):
    data = {
        "mapping" : [],
        "mfcc" : [],
        "labels" : []
    }
    
    expected_num_mfcc_vectors_per_track = math.ceil(int(SAMPLE_PER_TRACK-100) / hop_length) 
    
    for i, (dirpath,dirnames,filenames) in enumerate(os.walk(dataset_path)):
        #i = 0 gives dirpath directly instead of iterating
        if dirpath is not dataset_path:
            
            #save semantic label
            dirpath_components = dirpath.split("/")
            semantic_label = dirpath_components[-1]
            data["mapping"].append(semantic_label)
            
            print("\n Processing{}".format(semantic_label))
            
            #process files for specific case
            for f in filenames:
                
                #load audio file
                file_path = os.path.join(dirpath,f)
                signal, sr = librosa.load(file_path, sr = SAMPLE_RATE)
                
                #process the entire segment
                mfcc = librosa.feature.mfcc(signal[0:int(SAMPLE_PER_TRACK-100)],
                                            sr = sr,
                                            n_fft = n_fft,
                                            n_mfcc = n_mfcc,
                                            hop_length = hop_length
                                           )
                mfcc = mfcc.T
                
                if(len(mfcc)==expected_num_mfcc_vectors_per_track):
                    data["mfcc"].append(mfcc.tolist())
                    data["labels"].append(i-1)
                    print("{}".format(file_path))
                    
    with open(json_path,"w") as fp:
        json.dump(data,fp,indent=4)
            
            
    

In [5]:
save_mfcc(DATASET_PATH,JSON_PATH)


 Processingcovid
./Final_Dataset/covid/covid-13.wav
./Final_Dataset/covid/covid-8.wav
./Final_Dataset/covid/covid9.wav
./Final_Dataset/covid/covid_4.wav
./Final_Dataset/covid/covid2.wav
./Final_Dataset/covid/covid_27.wav
./Final_Dataset/covid/covid_21.wav
./Final_Dataset/covid/covid-29.wav
./Final_Dataset/covid/covid3.wav
./Final_Dataset/covid/covid-20.wav
./Final_Dataset/covid/covid17.wav
./Final_Dataset/covid/covid-25.wav
./Final_Dataset/covid/covid_29.wav
./Final_Dataset/covid/covid7.wav
./Final_Dataset/covid/covid_7.wav
./Final_Dataset/covid/covid_16.wav
./Final_Dataset/covid/covid14.wav
./Final_Dataset/covid/covid_20.wav
./Final_Dataset/covid/covid15.wav
./Final_Dataset/covid/covid-1.wav
./Final_Dataset/covid/covid-18.wav
./Final_Dataset/covid/covid-2.wav
./Final_Dataset/covid/covid16.wav
./Final_Dataset/covid/covid5.wav
./Final_Dataset/covid/covid_15.wav
./Final_Dataset/covid/covid-17.wav
./Final_Dataset/covid/covid-16.wav
./Final_Dataset/covid/covid-14.wav
./Final_Dataset/covid