In [None]:
from tensorflow.keras.utils import to_categorical
import pandas as pd
import os
import soundfile as sf
import numpy as np
from sklearn.utils import shuffle
from sys import platform
from sklearn.model_selection import train_test_split, GroupShuffleSplit

In [None]:
cols=['video_id','start_time','mid_ts','label','audio','vggish']

d=np.load(f'./resources/working_data/vocal_only_data_with_vggish.npy',allow_pickle=True)
df = pd.DataFrame(d,columns=cols)

lut = pd.read_csv(f'../dataset/lookup.csv')

In [None]:
df=df.merge(lut[['video_id','band_name']],on='video_id')
df

In [None]:
feature_df=df[['label','audio','band_name']]
mapping=[]
for index,row in feature_df.iterrows():
    if row['label'] == 'clean':
        mapping.append(0)
    if row['label'] == 'highfry':
        mapping.append(1)
    if row['label'] == 'layered':
        mapping.append(1)
    if row['label'] == 'lowfry':
        mapping.append(1)
    if row['label'] == 'midfry':
        mapping.append(1)
    if row['label'] == 'no_vocals':
        mapping.append(2)

feature_df.insert(3,'label_mapped',mapping)

## Undersampling the master data

In [None]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy={0:2462,1:3000,2:3000},random_state=0)
X = feature_df[['audio','band_name']].to_numpy()
y=feature_df['label_mapped'].to_numpy()
X_under, y_under = undersample.fit_resample(X, y)

band_names = X_under[:,1]
X_under=X_under[:,0]#.reshape(-1,1).flatten()
y_under=y_under

In [None]:
d=pd.DataFrame()
d['y_under'] = y_under
d['blah'] = 1

print(d.groupby('y_under')['blah'].count())

## Creating train-test-validation split

In [None]:
gss = GroupShuffleSplit(n_splits=5, train_size=.7, random_state=42)
train,test = next(gss.split(X_under, y_under, band_names))
# for train_idx,test_idx in gss.split(X_under, y_under, band_names): 
#     print(train_idx,test_idx)

In [None]:
X_train = X_under[train]
X_test1 = X_under[test]

y_train = y_under[train]
y_test1 = y_under[test]

X_test,X_valid,y_test,y_valid = train_test_split(X_test1, y_test1,test_size=0.5,random_state=42)

In [None]:
y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
y_valid_hot = to_categorical(y_valid)

X_train,y_train=shuffle(X_train,y_train_hot)
X_test,y_test=shuffle(X_test,y_test_hot)
X_valid,y_valid=shuffle(X_valid,y_valid_hot)

np.save(f'./resources/working_data/x_train-rawaudio.npy', X_train)
np.save(f'./resources/working_data/x_test-rawaudio.npy', X_test)
np.save(f'./resources/working_data/x_valid-rawaudio.npy', X_valid)

np.save(f'./resources/working_data/y_train-rawaudio.npy', y_train)
np.save(f'./resources/working_data/y_test-rawaudio.npy', y_test)
np.save(f'./resources/working_data/y_valid-rawaudio.npy', y_valid)

## VGGish

In [None]:
d=np.load(f'./resources/working_data/vocal_only_data_with_vggish.npy',allow_pickle=True)

In [None]:
d[:,5].shape

In [None]:
np.concatenate(d[:,5]).reshape(33820,128)

In [None]:
feature_df=df[['label','vggish','band_name']]
mapping=[]
for index,row in feature_df.iterrows():
    if row['label'] == 'clean':
        mapping.append(0)
    if row['label'] == 'highfry':
        mapping.append(1)
    if row['label'] == 'layered':
        mapping.append(1)
    if row['label'] == 'lowfry':
        mapping.append(1)
    if row['label'] == 'midfry':
        mapping.append(1)
    if row['label'] == 'no_vocals':
        mapping.append(2)

feature_df.insert(3,'label_mapped',mapping)


from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy={0:2462,1:3000,2:3000},random_state=0)
X = feature_df[['vggish','band_name']].to_numpy()
y=feature_df['label_mapped'].to_numpy()
X_under, y_under = undersample.fit_resample(X, y)

band_names = X_under[:,1]
X_under=X_under[:,0]#.reshape(-1,1).flatten()
X_under=np.concatenate(X_under).reshape(X_under.shape[0],128)

y_under=y_under

gss = GroupShuffleSplit(n_splits=5, train_size=.7, random_state=42)
train,test = next(gss.split(X_under, y_under, band_names))
# for train_idx,test_idx in gss.split(X_under, y_under, band_names): 
#     print(train_idx,test_idx)

X_train = X_under[train]
X_test1 = X_under[test]

y_train = y_under[train]
y_test1 = y_under[test]

X_test,X_valid,y_test,y_valid = train_test_split(X_test1, y_test1,test_size=0.5,random_state=42)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
y_valid_hot = to_categorical(y_valid)

X_train,y_train=shuffle(X_train,y_train_hot)
X_test,y_test=shuffle(X_test,y_test_hot)
X_valid,y_valid=shuffle(X_valid,y_valid_hot)



In [None]:
np.save(f'./resources/working_data/x_train-vggish.npy', X_train)
np.save(f'./resources/working_data/x_test-vggish.npy', X_test)
np.save(f'./resources/working_data/x_valid-vggish.npy', X_valid)

np.save(f'./resources/working_data/y_train-vggish.npy', y_train)
np.save(f'./resources/working_data/y_test-vggish.npy', y_test)
np.save(f'./resources/working_data/y_valid-vggish.npy', y_valid)

## Features

In [None]:
cols=['video_id', 'start_time', 'mid_ts', 'label', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std',
       'centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std','rms_mean','rms_std','vggish']

d=np.load(f'./resources/working_data/vocal_only_features.npy',allow_pickle=True)
df = pd.DataFrame(d,columns=cols)

lut = pd.read_csv(f'../dataset/lookup.csv')

df=df.merge(lut[['video_id','band_name']],on='video_id')
df

feature_df=df[['label', 'band_name', 'average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std',
       'centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std','rms_mean','rms_std']]
mapping=[]
for index,row in feature_df.iterrows():
    if row['label'] == 'clean':
        mapping.append(0)
    if row['label'] == 'highfry':
        mapping.append(1)
    if row['label'] == 'layered':
        mapping.append(1)
    if row['label'] == 'lowfry':
        mapping.append(1)
    if row['label'] == 'midfry':
        mapping.append(1)
    if row['label'] == 'no_vocals':
        mapping.append(2)

feature_df.insert(3,'label_mapped',mapping)


from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy={0:2462,1:3000,2:3000},random_state=0)
X = feature_df[['average_zcr',
       'zcr_stddev', 'mfcc1_mean', 'mfcc2_mean', 'mfcc3_mean',
       'mfcc4_mean', 'mfcc5_mean', 'mfcc6_mean', 'mfcc7_mean', 'mfcc8_mean',
       'mfcc9_mean', 'mfcc10_mean', 'mfcc11_mean', 'mfcc12_mean',
       'mfcc13_mean', 'mfcc1_std', 'mfcc2_std', 'mfcc3_std', 'mfcc4_std',
       'mfcc5_std', 'mfcc6_std', 'mfcc7_std', 'mfcc8_std', 'mfcc9_std',
       'mfcc10_std', 'mfcc11_std', 'mfcc12_std', 'mfcc13_std',
       'delta_mfcc1_mean', 'delta_mfcc2_mean', 'delta_mfcc3_mean',
       'delta_mfcc4_mean', 'delta_mfcc5_mean', 'delta_mfcc6_mean',
       'delta_mfcc7_mean', 'delta_mfcc8_mean', 'delta_mfcc9_mean',
       'delta_mfcc10_mean', 'delta_mfcc11_mean', 'delta_mfcc12_mean',
       'delta_mfcc13_mean', 'delta_mfcc1_std', 'delta_mfcc2_std',
       'delta_mfcc3_std', 'delta_mfcc4_std', 'delta_mfcc5_std',
       'delta_mfcc6_std', 'delta_mfcc7_std', 'delta_mfcc8_std',
       'delta_mfcc9_std', 'delta_mfcc10_std', 'delta_mfcc11_std',
       'delta_mfcc12_std', 'delta_mfcc13_std',
       'centroid_mean','centroid_std',
       'contrast_mean','contrast_std',
       'flatness_mean','flatness_std',
       'rolloff_mean','rolloff_std','rms_mean','rms_std','band_name']].to_numpy()
y=feature_df['label_mapped'].to_numpy()
X_under, y_under = undersample.fit_resample(X, y)

band_names = X_under[:,-1]
X_under=X_under[:,:-1]#.reshape(-1,1).flatten()
y_under=y_under

gss = GroupShuffleSplit(n_splits=5, train_size=.7, random_state=42)
train,test = next(gss.split(X_under, y_under, band_names))
# for train_idx,test_idx in gss.split(X_under, y_under, band_names): 
#     print(train_idx,test_idx)

X_train = X_under[train]
X_test1 = X_under[test]

y_train = y_under[train]
y_test1 = y_under[test]

X_test,X_valid,y_test,y_valid = train_test_split(X_test1, y_test1,test_size=0.5,random_state=42)

y_train_hot = to_categorical(y_train)
y_test_hot = to_categorical(y_test)
y_valid_hot = to_categorical(y_valid)

X_train,y_train=shuffle(X_train,y_train_hot)
X_test,y_test=shuffle(X_test,y_test_hot)
X_valid,y_valid=shuffle(X_valid,y_valid_hot)

np.save(f'./resources/working_data/x_train-features_unnormalized.npy', X_train)
np.save(f'./resources/working_data/x_test-features_unnormalized.npy', X_test)
np.save(f'./resources/working_data/x_valid-features_unnormalized.npy', X_valid)

np.save(f'./resources/working_data/y_train-features_unnormalized.npy', y_train)
np.save(f'./resources/working_data/y_test-features_unnormalized.npy', y_test)
np.save(f'./resources/working_data/y_valid-features_unnormalized.npy', y_valid)