In [2]:
# Import necessary packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
%matplotlib inline
import sklearn

# Import Librosa for processing audio data
import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')

## GTZAN data

In [3]:
# Create a shortcut direction for accessing data easily
general_path = '/Users/user/Documents/Đồ án tốt nghiệp/Music Data/Data'

In [4]:
# Import GTZAN dataset 
data_red = pd.read_csv(f'{general_path}/features_30_sec.csv')
data_red.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [5]:
# Drop the length column
data_red = data_red.drop('length', axis=1)

# Fuction for take FMA data into GTZNA data
1. Import FMA data
2. Get the features are similar with GTZNA data
3. Put those FMA data into GTZNA data

In [6]:
def get_features_FMA():
    # Create a dataframe containing FMA features
    FMA_df = pd.DataFrame(columns=data_red.columns.tolist())
    
    # Create a list of categories
    categories = list(data_red.label.unique())
    
    # Loop through each genres and each files to add the features into FMA_df 
    for i in categories:
        for j in range(100,150):
            
            # Take the audio file
            y, sr = librosa.load('{}/genres_original/{}/{}.00{}.mp3'.format(general_path,i,i,j))
            
            # Filter out redundancy audio
            audio_file, _ = librosa.effects.trim(y)
            
            # Chroma_features
            chromogram = librosa.feature.chroma_stft(audio_file, sr=sr)
            
            # Mel-Frequency Cepstral Coefficients 
            mfccs = librosa.feature.mfcc(audio_file, sr=sr)
            df_mfccs = pd.DataFrame(mfccs)
            mfccs_mean = df_mfccs.mean(axis=1)
            mfccs_var = df_mfccs.var(axis=1)
            
            # Spectral Rolloff
            spectral_rolloff = librosa.feature.spectral_rolloff(audio_file, sr=sr)[0]
            
            # Spetral Centroid
            spectral_centroids = librosa.feature.spectral_centroid(audio_file, sr=sr)[0]
            
            # Tempo 
            tempo, _ = librosa.beat.beat_track(audio_file, sr=sr)
            
            # Harmonics and Perceptrual
            harmony, perceptr = librosa.effects.hpss(audio_file)
            
            # Zero Crossing Rate
            zero_crossing_rate = librosa.feature.zero_crossing_rate(audio_file)

            # RMS
            rms = librosa.feature.rms(audio_file)
            
            # Spectral Bandwidth
            spectral_bandwidth = librosa.feature.spectral_bandwidth(audio_file)


            # Add features above into FMA_df
            FMA_df = FMA_df.append({'filename' : '{}.00{}.mp3'.format(i,j),
                             
                            'chroma_stft_mean' : chromogram.mean(),
                            'chroma_stft_var' : chromogram.var(),
                             
                            'rms_mean' : rms.mean(),
                            'rms_var' : rms.var(),
                             
                            'spectral_centroid_mean' : spectral_centroids.mean(),
                            'spectral_centroid_var' : spectral_centroids.var(),
                             
                            'spectral_bandwidth_mean' : spectral_bandwidth.mean(),
                            'spectral_bandwidth_var' : spectral_bandwidth.var(),
                             
                            'rolloff_mean' : spectral_rolloff.mean(),
                            'rolloff_var' : spectral_rolloff.var(),
                             
                            'zero_crossing_rate_mean' : zero_crossing_rate.mean(),
                            'zero_crossing_rate_var' : zero_crossing_rate.var(),
                             
                            'harmony_mean' : harmony.mean(),
                            'harmony_var' : harmony.var(),
                             
                            'perceptr_mean' : perceptr.mean(),
                            'perceptr_var' : perceptr.var(),
                             
                            'tempo' : tempo,
                             
                            'mfcc1_mean' : mfccs_mean[0],
                            'mfcc1_var' : mfccs_var[0],
                             
                            'mfcc2_mean' : mfccs_mean[1],
                            'mfcc2_var' : mfccs_var[1],
                             
                            'mfcc3_mean' : mfccs_mean[2],
                            'mfcc3_var' : mfccs_var[2],
                             
                            'mfcc4_mean' : mfccs_mean[3],
                            'mfcc4_var' : mfccs_var[3],
                             
                            'mfcc5_mean' : mfccs_mean[4],
                            'mfcc5_var' : mfccs_var[4],
                             
                            'mfcc6_mean' : mfccs_mean[5],
                            'mfcc6_var' : mfccs_var[5],
                             
                            'mfcc7_mean' : mfccs_mean[6],
                            'mfcc7_var' : mfccs_var[6],
                             
                            'mfcc8_mean' : mfccs_mean[7],
                            'mfcc8_var' : mfccs_var[7],
                             
                            'mfcc9_mean' : mfccs_mean[8],
                            'mfcc9_var' : mfccs_var[8],
                             
                            'mfcc10_mean' : mfccs_mean[9],
                            'mfcc10_var' : mfccs_var[9],
                             
                            'mfcc11_mean' : mfccs_mean[10],
                            'mfcc11_var' : mfccs_var[10],
                             
                            'mfcc12_mean' : mfccs_mean[11],
                            'mfcc12_var' : mfccs_var[11],
                             
                            'mfcc13_mean' : mfccs_mean[12],
                            'mfcc13_var' : mfccs_var[12],
                             
                            'mfcc14_mean' : mfccs_mean[13],
                            'mfcc14_var' : mfccs_var[13],
                             
                            'mfcc15_mean' : mfccs_mean[14],
                            'mfcc15_var' : mfccs_var[14],
                             
                            'mfcc16_mean' : mfccs_mean[15],
                            'mfcc16_var' : mfccs_var[15],
                             
                            'mfcc17_mean' : mfccs_mean[16],
                            'mfcc17_var' : mfccs_var[16],
                             
                            'mfcc18_mean' : mfccs_mean[17],
                            'mfcc18_var' : mfccs_var[17],
                             
                            'mfcc19_mean' : mfccs_mean[18],
                            'mfcc19_var' : mfccs_var[18],
                             
                            'mfcc20_mean' : mfccs_mean[19],
                            'mfcc20_var' : mfccs_var[19],
                                    
                            'label' : i}, ignore_index=True)
        
    return FMA_df


In [7]:
# Apply function to get FMA features dataset
df = get_features_FMA()

In [9]:
# Combine FMA and GTZAN dataset into one
combined_df_audio = pd.concat([data_red, df], axis=0)
combined_df_audio.head()

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,3805.839606,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,3550.522098,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,3042.260232,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,2184.745799,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,3579.757627,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [10]:
# Function to add more features for combined dataset
def add_more_features():
    more_feature_df = pd.DataFrame(columns=['chroma_cqt_mean', 'chroma_cqt_var',
                                           'chroma_cens_mean', 'chroma_cens_var',
                                           'spectral_contrast_mean', 'spectral_contrast_var',
                                           'spectral_flatness_mean', 'spectral_flatness_var',
                                           'tonnetz_mean', 'tonnetz_var'])
    
    # Create a list of categories
    categories = list(data_red.label.unique())
    
    # Loop through each genre, get features and add into the original
    for i in categories:
        for j in range(150):
            if j < 10:
                y, sr = librosa.load('{}/genres_original/{}/{}.0000{}.wav'.format(general_path,i,i,j))
                audio_file, _ = librosa.effects.trim(y)
            
                # Chroma_cqt
                chroma_cqt = librosa.feature.chroma_cqt(audio_file, sr=sr)
                
                # Chroma_cens
                chroma_cens = librosa.feature.chroma_cens(audio_file, sr=sr)
                
                # Spectral_contrast
                spectral_contrast = librosa.feature.spectral_contrast(audio_file, sr=sr)
                
                # Spectral_flatness
                spectral_flatness = librosa.feature.spectral_flatness(audio_file)
                
                # Tonnetz
                tonnetz = librosa.feature.tonnetz(audio_file, sr=sr)
                
                # Append feature into dataset
                more_feature_df = more_feature_df.append({'chroma_cqt_mean' : chroma_cqt.mean(),
                                                         'chroma_cqt_var' : chroma_cqt.var(),
                             
                                                         'chroma_cens_mean' : chroma_cens.mean(),
                                                         'chroma_cens_var' : chroma_cens.var(),
                            
                                                         'spectral_contrast_mean' : spectral_contrast.mean(),
                                                         'spectral_contrast_var' : spectral_contrast.var(),
                             
                                                         'spectral_flatness_mean' : spectral_flatness.mean(),
                                                         'spectral_flatness_var' : spectral_flatness.var(),
                             
                                                         'tonnetz_mean' : tonnetz.mean(),
                                                         'tonnetz_var' : tonnetz.var()}, ignore_index=True)
            elif 9 < j < 100:
                
                y, sr = librosa.load('{}/genres_original/{}/{}.000{}.wav'.format(general_path,i,i,j))
                audio_file, _ = librosa.effects.trim(y)
            
                # Chroma_cqt
                chroma_cqt = librosa.feature.chroma_cqt(audio_file, sr=sr)
                
                # Chroma_cens
                chroma_cens = librosa.feature.chroma_cens(audio_file, sr=sr)
                
                # Spectral_contrast
                spectral_contrast = librosa.feature.spectral_contrast(audio_file, sr=sr)
                
                # Spectral_flatness
                spectral_flatness = librosa.feature.spectral_flatness(audio_file)
                
                # Tonnetz
                tonnetz = librosa.feature.tonnetz(audio_file, sr=sr)
                
                # Append feature into dataset
                more_feature_df = more_feature_df.append({'chroma_cqt_mean' : chroma_cqt.mean(),
                                                         'chroma_cqt_var' : chroma_cqt.var(),
                             
                                                         'chroma_cens_mean' : chroma_cens.mean(),
                                                         'chroma_cens_var' : chroma_cens.var(),
                            
                                                         'spectral_contrast_mean' : spectral_contrast.mean(),
                                                         'spectral_contrast_var' : spectral_contrast.var(),
                             
                                                         'spectral_flatness_mean' : spectral_flatness.mean(),
                                                         'spectral_flatness_var' : spectral_flatness.var(),
                             
                                                         'tonnetz_mean' : tonnetz.mean(),
                                                         'tonnetz_var' : tonnetz.var()}, ignore_index=True)
            else:
                y, sr = librosa.load('{}/genres_original/{}/{}.00{}.mp3'.format(general_path,i,i,j))
                audio_file, _ = librosa.effects.trim(y)
            
                # Chroma_cqt
                chroma_cqt = librosa.feature.chroma_cqt(audio_file, sr=sr)
                
                # Chroma_cens
                chroma_cens = librosa.feature.chroma_cens(audio_file, sr=sr)
                
                # Spectral_contrast
                spectral_contrast = librosa.feature.spectral_contrast(audio_file, sr=sr)
                
                # Spectral_flatness
                spectral_flatness = librosa.feature.spectral_flatness(audio_file)
                
                # Tonnetz
                tonnetz = librosa.feature.tonnetz(audio_file, sr=sr)
                
                # Append feature into dataset
                more_feature_df = more_feature_df.append({'chroma_cqt_mean' : chroma_cqt.mean(),
                                                         'chroma_cqt_var' : chroma_cqt.var(),
                             
                                                         'chroma_cens_mean' : chroma_cens.mean(),
                                                         'chroma_cens_var' : chroma_cens.var(),
                            
                                                         'spectral_contrast_mean' : spectral_contrast.mean(),
                                                         'spectral_contrast_var' : spectral_contrast.var(),
                             
                                                         'spectral_flatness_mean' : spectral_flatness.mean(),
                                                         'spectral_flatness_var' : spectral_flatness.var(),
                             
                                                         'tonnetz_mean' : tonnetz.mean(),
                                                         'tonnetz_var' : tonnetz.var()}, ignore_index=True)
                
                
    return more_feature_df
                
    

In [11]:
# Apply function to get more features for dataset
added_more_features = add_more_features()
added_more_features.head()

Unnamed: 0,chroma_cqt_mean,chroma_cqt_var,chroma_cens_mean,chroma_cens_var,spectral_contrast_mean,spectral_contrast_var,spectral_flatness_mean,spectral_flatness_var,tonnetz_mean,tonnetz_var
0,0.535131,0.053737,0.272036,0.00933,20.526699,76.741834,0.004498,3e-05,0.007162,0.004417
1,0.476749,0.057883,0.26098,0.015223,20.676128,67.85152,0.002298,2.4e-05,0.029153,0.004762
2,0.44331,0.058933,0.26263,0.014359,22.197517,76.130717,0.002631,1.1e-05,0.024264,0.010511
3,0.31736,0.071272,0.226383,0.032084,21.426268,63.384626,0.000954,3e-06,0.026563,0.017854
4,0.496918,0.066167,0.25955,0.015967,21.466338,62.986186,0.003238,1.9e-05,-0.020918,0.010124


In [49]:
# Combine 2 dataset into one which is completed dataset
# Take out all column names of 2 dataset
completed_columns = combined_df_audio.columns.to_list() + added_more_features.columns.to_list()

# Create new dataset to contain new values 
completed_df = pd.DataFrame(columns=completed_columns)
categories = list(data_red.label.unique())


new = combined_df_audio[combined_df_audio['label'] == categories[0]].reset_index(drop=True)
add = pd.concat([new, added_more_features.iloc[:150]], axis=1)

new_1 = combined_df_audio[combined_df_audio['label'] == 'classical'].reset_index(drop=True)
add_1 = pd.concat([new_1, added_more_features.iloc[150:300].reset_index(drop=True)], axis=1)

completed_1 = pd.concat([add, add_1], axis=0)


In [50]:
completed_1

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,chroma_cqt_mean,chroma_cqt_var,chroma_cens_mean,chroma_cens_var,spectral_contrast_mean,spectral_contrast_var,spectral_flatness_mean,spectral_flatness_var,tonnetz_mean,tonnetz_var
0,blues.00000.wav,0.350088,0.088757,0.130228,0.002827,1784.165850,129774.064525,2002.449060,85882.761315,3805.839606,...,0.535131,0.053737,0.272036,0.009330,20.526699,76.741834,0.004498,3.038815e-05,0.007162,0.004417
1,blues.00001.wav,0.340914,0.094980,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,3550.522098,...,0.476749,0.057883,0.260980,0.015223,20.676128,67.851520,0.002298,2.353845e-05,0.029153,0.004762
2,blues.00002.wav,0.363637,0.085275,0.175570,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,3042.260232,...,0.443310,0.058933,0.262630,0.014359,22.197517,76.130717,0.002631,1.119039e-05,0.024264,0.010511
3,blues.00003.wav,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,2184.745799,...,0.317360,0.071272,0.226383,0.032084,21.426268,63.384626,0.000954,2.660916e-06,0.026563,0.017854
4,blues.00004.wav,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,3579.757627,...,0.496918,0.066167,0.259550,0.015967,21.466338,62.986186,0.003238,1.897771e-05,-0.020918,0.010124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,classical.00145.mp3,0.241412,0.082152,0.150335,0.003343,1089.019086,40003.114744,1145.005131,37903.667499,1954.334167,...,0.275055,0.078104,0.192852,0.046141,28.697255,85.058221,0.000276,5.011583e-07,0.032311,0.049729
146,classical.00146.mp3,0.313317,0.085977,0.072114,0.001634,764.085222,13156.713861,834.656802,21144.129570,1436.271310,...,0.312090,0.086091,0.187906,0.048024,26.782440,59.301598,0.000037,9.509690e-08,0.018112,0.049252
147,classical.00147.mp3,0.234915,0.082297,0.079151,0.002413,945.774940,28040.275467,983.094460,31833.394847,1702.480321,...,0.275261,0.101666,0.180947,0.050591,29.666618,48.461453,0.000076,8.078375e-08,-0.006930,0.067977
148,classical.00148.mp3,0.207310,0.086884,0.057287,0.001734,845.188872,18168.723356,827.372702,25861.453828,1352.747178,...,0.286725,0.093099,0.185287,0.049002,27.993377,45.450996,0.000031,1.743117e-08,0.032583,0.047467


In [51]:
# Combine 2 dataset into one which is completed dataset
# Take out all column names of 2 dataset
completed_columns = combined_df_audio.columns.to_list() + added_more_features.columns.to_list()

# Create new dataset to contain new values 
completed_df = pd.DataFrame(columns=completed_columns)
categories = list(data_red.label.unique())


new = combined_df_audio[combined_df_audio['label'] == categories[0]].reset_index(drop=True)
add = pd.concat([new, added_more_features.iloc[:150]], axis=1)
completed_1 = completed_df.append(add, ignore_index=True)

n = 300

for i in categories[1:]:
    
    # Getting songs are the same with specified song
    new_data = combined_df_audio[combined_df_audio['label'] == i].reset_index(drop=True)
    
    # Filter out range of features
    add_more = added_more_features.iloc[n-150:n].reset_index(drop=True)
    
    # Combine those two filtering above
    combine = pd.concat([new_data, add_more], axis=1)
    
    # Combine above dataset into the completed_1 dataset
    completed_1 = pd.concat([completed_1, combine], axis=0, ignore_index=True)
    
    # Plus itself for each loop
    n += 150

completed_1

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,chroma_cqt_mean,chroma_cqt_var,chroma_cens_mean,chroma_cens_var,spectral_contrast_mean,spectral_contrast_var,spectral_flatness_mean,spectral_flatness_var,tonnetz_mean,tonnetz_var
0,blues.00000.wav,0.350088,0.088757,0.130228,0.002827,1784.165850,129774.064525,2002.449060,85882.761315,3805.839606,...,0.535131,0.053737,0.272036,0.009330,20.526699,76.741834,0.004498,0.000030,0.007162,0.004417
1,blues.00001.wav,0.340914,0.094980,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,3550.522098,...,0.476749,0.057883,0.260980,0.015223,20.676128,67.851520,0.002298,0.000024,0.029153,0.004762
2,blues.00002.wav,0.363637,0.085275,0.175570,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,3042.260232,...,0.443310,0.058933,0.262630,0.014359,22.197517,76.130717,0.002631,0.000011,0.024264,0.010511
3,blues.00003.wav,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,2184.745799,...,0.317360,0.071272,0.226383,0.032084,21.426268,63.384626,0.000954,0.000003,0.026563,0.017854
4,blues.00004.wav,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,3579.757627,...,0.496918,0.066167,0.259550,0.015967,21.466338,62.986186,0.003238,0.000019,-0.020918,0.010124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,rock.00145.mp3,0.484937,0.078405,0.350307,0.002019,2609.595900,145157.316719,2380.748151,55568.591784,5094.494194,...,0.558072,0.051269,0.273489,0.008537,18.884377,47.704593,0.031787,0.000338,-0.007361,0.003850
1496,rock.00146.mp3,0.305203,0.093703,0.179621,0.001321,1279.402217,105029.857437,1564.775526,45909.869908,2794.262016,...,0.356624,0.074964,0.218830,0.035447,19.914648,56.838348,0.001795,0.000006,-0.016432,0.009719
1497,rock.00147.mp3,0.408584,0.080078,0.152672,0.001840,2048.334865,172782.940192,1702.202945,47236.220997,3803.729414,...,0.470010,0.065966,0.256140,0.017726,20.319775,61.370194,0.009568,0.000036,0.003838,0.010350
1498,rock.00148.mp3,0.430541,0.086723,0.175718,0.001206,1780.930259,226527.887285,2111.910130,152729.605893,3842.801085,...,0.418021,0.065369,0.249422,0.021122,20.912142,61.403757,0.007877,0.000053,0.025950,0.009794


In [53]:
# List out the column names of dataset
completed_df.columns

Index(['filename', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo',
       'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean',
       'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var',
       'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
       'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var',
       'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean',
       'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
       'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean',
       'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var',
       'labe

In [55]:
# Custom the position of column names suitably
list_name_column = ['filename', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean',
       'rms_var', 'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo',
       'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean',
       'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var',
       'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
       'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var',
       'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean',
       'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
       'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean',
       'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var',
       'chroma_cqt_mean', 'chroma_cqt_var', 'chroma_cens_mean','chroma_cens_var', 
       'spectral_contrast_mean', 'spectral_contrast_var','spectral_flatness_mean', 
       'spectral_flatness_var', 'tonnetz_mean', 'tonnetz_var','label']

completed_1 = completed_1[list_name_column]
completed_1

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,chroma_cqt_var,chroma_cens_mean,chroma_cens_var,spectral_contrast_mean,spectral_contrast_var,spectral_flatness_mean,spectral_flatness_var,tonnetz_mean,tonnetz_var,label
0,blues.00000.wav,0.350088,0.088757,0.130228,0.002827,1784.165850,129774.064525,2002.449060,85882.761315,3805.839606,...,0.053737,0.272036,0.009330,20.526699,76.741834,0.004498,0.000030,0.007162,0.004417,blues
1,blues.00001.wav,0.340914,0.094980,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,3550.522098,...,0.057883,0.260980,0.015223,20.676128,67.851520,0.002298,0.000024,0.029153,0.004762,blues
2,blues.00002.wav,0.363637,0.085275,0.175570,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,3042.260232,...,0.058933,0.262630,0.014359,22.197517,76.130717,0.002631,0.000011,0.024264,0.010511,blues
3,blues.00003.wav,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,2184.745799,...,0.071272,0.226383,0.032084,21.426268,63.384626,0.000954,0.000003,0.026563,0.017854,blues
4,blues.00004.wav,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,3579.757627,...,0.066167,0.259550,0.015967,21.466338,62.986186,0.003238,0.000019,-0.020918,0.010124,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,rock.00145.mp3,0.484937,0.078405,0.350307,0.002019,2609.595900,145157.316719,2380.748151,55568.591784,5094.494194,...,0.051269,0.273489,0.008537,18.884377,47.704593,0.031787,0.000338,-0.007361,0.003850,rock
1496,rock.00146.mp3,0.305203,0.093703,0.179621,0.001321,1279.402217,105029.857437,1564.775526,45909.869908,2794.262016,...,0.074964,0.218830,0.035447,19.914648,56.838348,0.001795,0.000006,-0.016432,0.009719,rock
1497,rock.00147.mp3,0.408584,0.080078,0.152672,0.001840,2048.334865,172782.940192,1702.202945,47236.220997,3803.729414,...,0.065966,0.256140,0.017726,20.319775,61.370194,0.009568,0.000036,0.003838,0.010350,rock
1498,rock.00148.mp3,0.430541,0.086723,0.175718,0.001206,1780.930259,226527.887285,2111.910130,152729.605893,3842.801085,...,0.065369,0.249422,0.021122,20.912142,61.403757,0.007877,0.000053,0.025950,0.009794,rock


In [61]:
# Rescaling data
cols = completed_1.columns[1:-1]
scale = preprocessing.StandardScaler()
np_scaled = scale.fit_transform(completed_1.iloc[:,1:-1])

# New scaled dataframe
scaled_feature = pd.DataFrame(data=np_scaled, columns=cols)
scaled_feature = pd.concat([completed_1.iloc[:,0], scaled_feature, completed_1.iloc[:,-1]], axis=1)
scaled_feature.head()

Unnamed: 0,filename,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,chroma_cqt_var,chroma_cens_mean,chroma_cens_var,spectral_contrast_mean,spectral_contrast_var,spectral_flatness_mean,spectral_flatness_var,tonnetz_mean,tonnetz_var,label
0,blues.00000.wav,-0.376549,0.303461,-0.183594,-0.14226,-0.489905,-0.803072,-0.385229,-0.557901,-0.387112,...,-0.95841,0.928919,-0.96787,-0.121318,1.043437,-0.625217,-0.456503,-0.04506,-0.710316,blues
1,blues.00001.wav,-0.489015,1.048257,-0.636506,-0.256053,-0.847389,-0.248847,-0.314757,0.426392,-0.551258,...,-0.628375,0.497337,-0.481392,-0.05392,0.691727,-0.710876,-0.459329,0.806544,-0.686788,blues
2,blues.00002.wav,-0.210458,-0.113159,0.415474,-0.162509,-0.81553,-0.742952,-0.875908,-0.631965,-0.878024,...,-0.54485,0.561751,-0.552718,0.632285,1.019261,-0.697904,-0.464424,0.617224,-0.294641,blues
3,blues.00003.wav,0.293949,0.930834,-0.040044,0.740006,-1.494926,-0.68014,-1.167313,0.061768,-1.429327,...,0.437271,-0.853148,0.910487,0.284422,0.515012,-0.763175,-0.467944,0.706229,0.206259,blues
4,blues.00004.wav,-0.886038,0.193892,-0.694891,-0.273434,-0.418352,-0.321933,-0.875003,-0.53819,-0.532462,...,0.030965,0.441511,-0.419939,0.302495,0.49925,-0.674283,-0.461211,-1.132454,-0.321003,blues


In [62]:
# Save completed FMA + GTZAN dataset into a csv format
scaled_feature.to_csv('Combined_data_audio.csv', index=False)