In [None]:
import numpy as np
import pandas as pd
import glob

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:
import librosa

# get statistical features in numpy
def stats(matrix):
    mean=np.mean(matrix)
    std=np.std(matrix)
    maxv=np.amax(matrix)
    minv=np.amin(matrix)
    median=np.median(matrix)

    output=np.array([mean,std,maxv,minv,median])
    
    return output

# featurize with librosa following documentation
# https://librosa.github.io/librosa/feature.html 
def librosa_featurize(filename, categorize):
    # if categorize == True, output feature categories 
    print('librosa featurizing: %s'%(filename))

    y, sr = librosa.load(filename)

    # FEATURE EXTRACTION
    ######################################################
    # extract major features using librosa
    mfcc=librosa.feature.mfcc(y)
    poly_features=librosa.feature.poly_features(y)
    chroma_cens=librosa.feature.chroma_cens(y)
    chroma_cqt=librosa.feature.chroma_cqt(y)
    chroma_stft=librosa.feature.chroma_stft(y)
    tempogram=librosa.feature.tempogram(y)

    spectral_centroid=librosa.feature.spectral_centroid(y)[0]
    spectral_bandwidth=librosa.feature.spectral_bandwidth(y)[0]
    spectral_contrast=librosa.feature.spectral_contrast(y)[0]
    spectral_flatness=librosa.feature.spectral_flatness(y)[0]
    spectral_rolloff=librosa.feature.spectral_rolloff(y)[0]
    onset=librosa.onset.onset_detect(y)
    onset=np.append(len(onset),stats(onset))
    tempo=librosa.beat.tempo(y)[0]
    onset_features=np.append(onset,tempo)
    onset_strength=librosa.onset.onset_strength(y)
    zero_crossings=librosa.feature.zero_crossing_rate(y)[0]
    # rmse=librosa.feature.rmse(y)[0]
    rmse=librosa.feature.rms(y=y)[0]

    # FEATURE CLEANING 
    ######################################################

    # onset detection features
    onset_features=np.append(onset_features,stats(onset_strength))

    # rhythm features (384) - take the first 13
    rhythm_features=np.concatenate(np.array([stats(tempogram[0]),
                                      stats(tempogram[1]),
                                      stats(tempogram[2]),
                                      stats(tempogram[3]),
                                      stats(tempogram[4]),
                                      stats(tempogram[5]),
                                      stats(tempogram[6]),
                                      stats(tempogram[7]),
                                      stats(tempogram[8]),
                                      stats(tempogram[9]),
                                      stats(tempogram[10]),
                                      stats(tempogram[11]),
                                      stats(tempogram[12])]))

    # spectral features (first 13 mfccs)
    spectral_features=np.concatenate(np.array([stats(mfcc[0]),
                                        stats(mfcc[1]),
                                        stats(mfcc[2]),
                                        stats(mfcc[3]),
                                        stats(mfcc[4]),
                                        stats(mfcc[5]),
                                        stats(mfcc[6]),
                                        stats(mfcc[7]),
                                        stats(mfcc[8]),
                                        stats(mfcc[9]),
                                        stats(mfcc[10]),
                                        stats(mfcc[11]),
                                        stats(mfcc[12]),
                                        stats(poly_features[0]),
                                        stats(poly_features[1]),
                                        stats(spectral_centroid),
                                        stats(spectral_bandwidth),
                                        stats(spectral_contrast),
                                        stats(spectral_flatness),
                                        stats(spectral_rolloff)])) 

    # power features
    power_features=np.concatenate(np.array([stats(zero_crossings),
                                         stats(rmse)])) 

    # you can also concatenate the features
    if categorize == True:
        # can output feature categories if true 
        features={'onset':onset_features,
                  'rhythm':rhythm_features,
                  'spectral':spectral_features,
                  'power':power_features}
    else:
        # can output numpy array of everything if we don't need categorizations 
        features = np.concatenate(np.array([onset_features,
                                       rhythm_features,
                                       spectral_features,
                                       power_features]))

    return features

In [None]:
import json
import warnings
warnings.filterwarnings('ignore')

def get_librosa_np(wav_file, truth_or_lie):
    file_array = []
    filelocation = '/content/drive/Shareddrives/想聊對不隊/Code/VoiceData/'+wav_file +'/' 
    output_path = '/content/drive/Shareddrives/想聊對不隊/Code/VoiceData_output/'+ truth_or_lie +'/' 

    for i in glob.glob(filelocation + '*.wav'): 
        file_array.append(i)
        output_file = i.split('/')[-1].replace('.wav', '.npy')
        features_np = librosa_featurize(i, False)
        np.save(output_path+output_file, features_np)

In [None]:
get_librosa_np('trainingData', '')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/WRz1F6DqmRQ.002.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/zMXfMcqpPwQ.005.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/YP7N98ECZaI.005.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/HTF8k56_Oxo.001.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/xe3l76e8yE4.001.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/5PsD_NFMWpQ.000.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/JO44XCaQGVY.003.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/cgp1OzTOq1o.005.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/trainingData/jVGTEDAYWZ8.003.wav
librosa 

In [None]:
get_librosa_np('validationData', '')

librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/Ahiq4zhmYco.003.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/GwKmjEb3qN0.000.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/f83xK4chMJs.001.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/4XdZDodpzac.001.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/vqLtOsoNPnU.003.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/Zwu5Y5r3Lrs.004.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/vLAHfIjmRMc.003.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/n0acEDmc0mA.001.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/Code/VoiceData/validationData/shtirfEzWFA.005.wav
librosa featurizing: /content/drive/Shareddrives/想聊對不隊/

In [None]:
def get_df(truth_lie, label):
    file_array = []
    file_list = []
    filelocation = '/content/drive/Shareddrives/想聊對不隊/Code/VoiceData_output/{}/'.format(truth_lie)
    all_array = np.zeros(shape=(1,187))

    for i in glob.glob(filelocation + '*.npy'): 
        file_array.append(i)
        
    file_array.sort()

    for file in file_array:
        f = file.split('/')[-1]
        file_list.append(f)
        tmp_np = np.load(file)
        tmp_np = np.reshape(tmp_np, (1, -1))
        all_array = np.concatenate((all_array, tmp_np), axis=0)

    df = pd.DataFrame(all_array[1:])
    df['index'] = file_list
    df['label'] = label

    return df

In [None]:
all_df = get_df('', 0)
all_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,index,label
0,70.0,299.600000,190.115409,655.0,4.0,286.0,135.999178,1.908365,1.997765,11.704920,0.0,1.135551,1.0,0.0,1.0,1.0,1.0,0.775219,0.019928,0.812975,0.734596,0.773338,0.515539,0.036613,0.573661,0.432881,0.511339,0.441871,0.039698,0.506299,0.339871,0.439026,0.426913,0.020013,0.455072,0.379451,0.427687,0.432453,0.020377,0.469423,...,11.065073,-0.650785,1.893801,3077.179742,2097.215756,8129.788507,750.283839,1827.883234,2370.874489,785.299790,4001.204741,975.697850,2273.302810,18.599344,5.458899,50.197826,6.425083,18.435578,0.068250,0.102812,0.342515,0.000076,0.004718,5236.875000,3316.591473,10066.772461,559.863281,3736.010742,0.227507,0.186812,0.716309,0.020020,0.127197,0.123281,0.094556,0.370559,0.003385,0.122450,--Ymqszjv54.001.npy,0
1,56.0,356.750000,172.386220,658.0,3.0,366.0,103.359375,1.704722,1.851429,11.989050,0.0,0.996852,1.0,0.0,1.0,1.0,1.0,0.783273,0.045235,0.926091,0.716522,0.776311,0.540025,0.092629,0.850525,0.431791,0.503680,0.475343,0.107302,0.841786,0.362733,0.439607,0.468771,0.106470,0.826907,0.352459,0.455900,0.462801,0.094306,0.807206,...,11.268920,-0.842661,1.193815,3225.070676,1967.454537,7975.048548,750.064924,2482.453287,2578.955403,773.489875,3943.815092,1070.088754,2704.532700,18.654294,6.331278,43.538085,4.840851,18.771733,0.083272,0.109398,0.355851,0.000057,0.010594,5758.451594,3210.503279,9980.639648,581.396484,7208.239746,0.230346,0.182558,0.780762,0.012207,0.172363,0.120945,0.109059,0.425536,0.003387,0.105818,--Ymqszjv54.003.npy,0
2,44.0,368.181818,173.694517,657.0,20.0,397.5,103.359375,1.612900,1.848751,13.731165,0.0,0.954434,1.0,0.0,1.0,1.0,1.0,0.787342,0.062246,0.962007,0.715262,0.769998,0.575593,0.120855,0.922194,0.425099,0.557616,0.488559,0.135150,0.914063,0.337346,0.465448,0.458627,0.132640,0.908543,0.342759,0.430899,0.438253,0.122404,0.888744,...,11.174318,-0.279807,0.604253,3282.062992,1833.258310,7793.660925,876.410810,3218.689656,2615.708610,760.337150,4022.023279,981.656619,2958.875117,17.490443,5.750473,49.234673,4.864487,17.348023,0.094706,0.108676,0.321394,0.000101,0.024909,5929.901567,3123.155854,9819.140625,785.961914,7816.552734,0.240638,0.175609,0.705566,0.018066,0.204834,0.097825,0.104719,0.406333,0.003315,0.061828,--Ymqszjv54.004.npy,0
3,50.0,350.320000,170.453212,656.0,4.0,369.5,117.453835,1.680763,1.752604,14.726851,0.0,1.014865,1.0,0.0,1.0,1.0,1.0,0.807657,0.053045,0.879666,0.701267,0.834749,0.593712,0.081511,0.740336,0.442828,0.612894,0.502030,0.083388,0.670648,0.334552,0.497552,0.447168,0.075811,0.596587,0.265390,0.442006,0.414596,0.070158,0.555836,...,10.964190,-0.749950,0.896370,3240.789095,1869.837566,8083.325783,630.409165,3095.388839,2565.434910,790.442451,3928.434006,928.779698,2868.492201,18.424038,6.374528,46.107464,5.775931,18.158202,0.091321,0.110386,0.359568,0.000021,0.021976,5827.537287,3192.247631,9991.406250,602.929688,7735.803223,0.234754,0.173853,0.747559,0.019531,0.184082,0.113174,0.114083,0.458946,0.003402,0.085229,--Ymqszjv54.005.npy,0
4,65.0,307.076923,172.053597,659.0,20.0,289.0,123.046875,1.598641,1.512168,13.232365,0.0,1.130378,1.0,0.0,1.0,1.0,1.0,0.779833,0.037149,0.870994,0.708192,0.782795,0.569508,0.057849,0.748692,0.464202,0.569439,0.534204,0.059135,0.720962,0.405883,0.521930,0.552663,0.048979,0.706770,0.431353,0.554106,0.558569,0.044725,0.676348,...,4.491610,-0.422197,0.562216,3153.821332,1409.114753,8074.849295,1008.373229,2750.530355,2549.193161,478.173866,4076.879054,1362.303576,2574.164398,16.883227,6.701566,41.364237,3.028027,15.700605,0.064234,0.093385,0.405756,0.000124,0.011422,5835.318604,2191.448026,9872.973633,1313.525391,5878.564453,0.185155,0.148102,0.763184,0.017578,0.122070,0.042228,0.033160,0.157942,0.001221,0.035599,-2qsCrkXdWs.001.npy,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7195,56.0,333.535714,200.278464,653.0,3.0,347.5,112.347147,1.733902,2.035767,18.331079,0.0,1.046326,1.0,0.0,1.0,1.0,1.0,0.707980,0.027991,0.798587,0.676931,0.698467,0.451752,0.036925,0.592331,0.418555,0.433222,0.383924,0.037262,0.540392,0.336909,0.374243,0.378517,0.039606,0.518208,0.316256,0.377561,0.396902,0.057538,0.557786,...,6.957117,-0.134092,0.695997,1961.536709,1269.571821,7562.656657,445.939444,1647.481531,2186.373127,561.276902,3637.997172,1082.183842,2167.106776,20.132795,6.170326,52.014975,8.233410,19.791018,0.025196,0.043539,0.278663,0.000024,0.004452,3742.862216,2437.586159,9733.007812,430.664062,2971.582031,0.104698,0.104414,0.723145,0.009766,0.073730,0.040916,0.035830,0.189921,0.001729,0.037826,zxuPCshTSOs.005.npy,0
7196,75.0,320.600000,192.758917,658.0,10.0,324.0,117.453835,2.213463,2.392744,21.820965,0.0,1.491134,1.0,0.0,1.0,1.0,1.0,0.723812,0.027423,0.777699,0.671523,0.722884,0.469929,0.039153,0.585613,0.412201,0.472734,0.400306,0.059397,0.569211,0.328280,0.389711,0.388492,0.082124,0.630834,0.302737,0.369266,0.393855,0.077012,0.631427,...,4.177116,0.009453,0.451308,2182.642931,1243.155445,6383.927858,584.151841,1814.315915,2282.047049,556.609281,3689.759621,1137.637099,2253.554943,21.682741,5.993691,41.100238,6.814862,21.502241,0.033252,0.058165,0.304515,0.000058,0.003007,4370.130948,2287.172736,9237.744141,419.897461,3849.060059,0.104749,0.098080,0.549805,0.015137,0.066162,0.027205,0.022007,0.110740,0.000437,0.023900,zyGz_H1UTnQ.002.npy,0
7197,76.0,338.394737,198.180744,654.0,5.0,353.5,135.999178,2.141203,2.282744,17.269548,0.0,1.356836,1.0,0.0,1.0,1.0,1.0,0.753313,0.040156,0.814178,0.705213,0.753426,0.505691,0.086823,0.644836,0.390437,0.509167,0.435834,0.096293,0.601015,0.297512,0.424442,0.424725,0.108358,0.580522,0.269348,0.404897,0.430435,0.130861,0.680266,...,4.580301,0.006926,0.454181,2474.840088,1590.842064,6720.425368,506.193278,1843.434196,2293.733081,633.358107,3732.041593,990.084690,2233.593918,20.945602,5.991808,39.640159,6.902322,21.085611,0.051223,0.079913,0.339266,0.000059,0.004777,4694.026212,2729.486446,9829.907227,419.897461,3870.593262,0.141411,0.143451,0.616699,0.015625,0.074707,0.030311,0.027495,0.145779,0.000381,0.025110,zyGz_H1UTnQ.003.npy,0
7198,79.0,324.962025,194.288930,642.0,4.0,336.0,117.453835,1.998982,1.938325,12.696980,0.0,1.365684,1.0,0.0,1.0,1.0,1.0,0.745742,0.026084,0.794031,0.700384,0.738207,0.508109,0.035809,0.606230,0.440440,0.496118,0.451634,0.043930,0.591947,0.356199,0.455989,0.472917,0.057189,0.651117,0.373601,0.459948,0.498976,0.050921,0.670149,...,1.895943,-0.003245,0.360884,1887.171943,1356.523333,7401.004847,508.411770,1302.393877,2085.963243,663.815197,3780.221700,1019.384553,1868.432791,21.264649,5.865563,51.505983,8.974303,21.544346,0.031877,0.066001,0.323397,0.000022,0.001032,3677.251199,2557.848127,9722.241211,452.197266,2691.650391,0.095617,0.110774,0.701660,0.010254,0.043945,0.021730,0.017598,0.094061,0.000434,0.019670,zyGz_H1UTnQ.004.npy,0


In [None]:
all_df.to_csv('/content/drive/Shareddrives/想聊對不隊/Code/CSV/all_audio_result.csv', index=False)