In [18]:
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.utils import to_categorical
import plotly.graph_objs as go
from plotly.subplots import make_subplots

import pandas as pd
from sklearn.utils import shuffle

from sys import platform
if platform == "linux" or platform == "linux2":
    # linux
    path='/home/vedant/projects/'
elif platform == "darwin":
    # OS X
    path='/Users/vedant/Desktop/Programming/'


In [5]:
import os
import pandas as pd
import soundfile as sf
import numpy as np

cols=['video_id','start_time','mid_ts','label','audio','vggish']

d=np.load(f'{path}ScreamDetection/resources/working_data/vocal_only_data_with_vggish.npy',allow_pickle=True)
df = pd.DataFrame(d,columns=cols)

lut = pd.read_csv(f'{path}/ScreamDetection/resources/dataset/lookup_new.csv')

In [6]:
feature_df=df[cols]
mapping=[]
for index,row in feature_df.iterrows():
    if row['label'] == 'clean':
        mapping.append(0)
    if row['label'] == 'highfry':
        mapping.append(1)
    if row['label'] == 'layered':
        mapping.append(1)
    if row['label'] == 'lowfry':
        mapping.append(1)
    if row['label'] == 'midfry':
        mapping.append(1)
    if row['label'] == 'no_vocals':
        mapping.append(2)

feature_df.insert(4,'label_mapped',mapping)

In [7]:
feature_df

Unnamed: 0,video_id,start_time,mid_ts,label,label_mapped,audio,vggish
0,4600fGWcn9o,0.0,0.5,no_vocals,2,"[0.0, 0.0, 0.0, -3.0517578125e-05, -1.52587890...","[166.0, 8.0, 149.0, 128.0, 199.0, 57.0, 96.0, ..."
1,4600fGWcn9o,0.5,1.0,no_vocals,2,"[0.0004730224609375, 0.0001983642578125, -6.10...","[175.0, 10.0, 147.0, 103.0, 210.0, 74.0, 81.0,..."
2,4600fGWcn9o,1.0,1.5,no_vocals,2,"[-4.57763671875e-05, -3.0517578125e-05, 1.5258...","[173.0, 10.0, 148.0, 131.0, 191.0, 76.0, 71.0,..."
3,4600fGWcn9o,1.5,2.0,no_vocals,2,"[3.0517578125e-05, 0.0, -3.0517578125e-05, 0.0...","[164.0, 7.0, 154.0, 128.0, 190.0, 80.0, 29.0, ..."
4,4600fGWcn9o,2.0,2.5,no_vocals,2,"[-0.0008087158203125, -0.000885009765625, -0.0...","[169.0, 11.0, 146.0, 115.0, 191.0, 75.0, 108.0..."
...,...,...,...,...,...,...,...
33815,0m5fIHHfJTw,217.5,218.0,no_vocals,2,"[1.52587890625e-05, 6.103515625e-05, 0.0001220...","[174.0, 13.0, 142.0, 130.0, 194.0, 76.0, 84.0,..."
33816,0m5fIHHfJTw,218.0,218.5,no_vocals,2,"[6.103515625e-05, 9.1552734375e-05, 0.00012207...","[174.0, 10.0, 148.0, 124.0, 190.0, 77.0, 86.0,..."
33817,0m5fIHHfJTw,218.5,219.0,no_vocals,2,"[0.0, -1.52587890625e-05, -1.52587890625e-05, ...","[171.0, 9.0, 140.0, 110.0, 201.0, 56.0, 68.0, ..."
33818,0m5fIHHfJTw,219.0,219.5,no_vocals,2,"[-1.52587890625e-05, -1.52587890625e-05, -1.52...","[172.0, 10.0, 141.0, 109.0, 203.0, 56.0, 71.0,..."


In [8]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler(sampling_strategy='not minority',random_state=0)
from collections import Counter
X = feature_df[cols].to_numpy()
y=feature_df[['label_mapped']].to_numpy()

X_under, y_under = undersample.fit_resample(X, y)

undersampled_data = pd.DataFrame(X_under,columns=cols)
undersampled_data['label_mapped'] = y_under
#print(undersampled_data)


from sklearn.model_selection import GroupShuffleSplit
train_inds, test_inds = next(GroupShuffleSplit(test_size=.2, n_splits=2, random_state = 0).split(lut, groups=lut['band_name']))

train = lut.iloc[train_inds]
test = lut.iloc[test_inds]

train_ids = train['video_id'].to_numpy()
test_ids = test['video_id'].to_numpy()

#df_final = df
df_final = undersampled_data
train = df_final[df_final.video_id.isin(train_ids)]
test = df_final[df_final.video_id.isin(test_ids)]

In [9]:
train

Unnamed: 0,video_id,start_time,mid_ts,label,audio,vggish,label_mapped
0,4600fGWcn9o,56.0,56.5,clean,"[0.393218994140625, 0.3555908203125, 0.3164520...","[172.0, 25.0, 179.0, 154.0, 204.0, 79.0, 104.0...",0
1,4600fGWcn9o,56.5,57.0,clean,"[0.0284576416015625, 0.06842041015625, 0.10578...","[182.0, 36.0, 195.0, 88.0, 196.0, 60.0, 115.0,...",0
2,4600fGWcn9o,57.0,57.5,clean,"[-0.2148284912109375, -0.27899169921875, -0.30...","[174.0, 26.0, 167.0, 139.0, 215.0, 80.0, 89.0,...",0
3,4600fGWcn9o,57.5,58.0,clean,"[-0.1547698974609375, -0.1190185546875, -0.075...","[180.0, 30.0, 185.0, 99.0, 217.0, 115.0, 137.0...",0
4,4600fGWcn9o,58.0,58.5,clean,"[-0.080902099609375, -0.1483001708984375, -0.1...","[174.0, 22.0, 177.0, 122.0, 197.0, 96.0, 105.0...",0
...,...,...,...,...,...,...,...
7379,JuRRnVqv2Vc,112.0,112.5,no_vocals,"[-0.0001068115234375, 3.0517578125e-05, 0.0001...","[173.0, 12.0, 156.0, 104.0, 214.0, 81.0, 99.0,...",2
7380,hwxTEcHnC1o,113.0,113.5,no_vocals,"[0.004638671875, 0.0041656494140625, 0.0032806...","[169.0, 14.0, 141.0, 127.0, 199.0, 59.0, 79.0,...",2
7382,FukeNR1ydOA,133.5,134.0,no_vocals,"[0.01397705078125, 0.012451171875, 0.010940551...","[171.0, 15.0, 163.0, 113.0, 197.0, 88.0, 107.0...",2
7383,B7iIS91fMAc,10.0,10.5,no_vocals,"[-3.0517578125e-05, -1.52587890625e-05, 0.0, -...","[161.0, 0.0, 154.0, 132.0, 180.0, 66.0, 46.0, ...",2


In [10]:
x_train = train['audio'].to_numpy()
y_train_hot = to_categorical(train['label_mapped'].to_numpy())

x_test = test['audio'].to_numpy()
y_test_hot = to_categorical(test['label_mapped'].to_numpy())

In [11]:
import librosa

In [12]:
X_train = []
X_test = []
for y in x_train:
    X_train.append(librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=44100)))
for y in x_test:
    X_test.append(librosa.power_to_db(librosa.feature.melspectrogram(y=y, sr=44100)))

In [47]:
X_train[0].shape

(128, 87)

In [13]:
X_train=np.array(X_train)
X_test=np.array(X_test)

X_train = X_train.reshape(X_train.shape[0], 128, 87, 1)
X_test = X_test.reshape(X_test.shape[0], 128, 87, 1)

X_train,y_train=shuffle(X_train,y_train_hot)
X_test,y_test=shuffle(X_test,y_test_hot)

In [14]:
array=X_train
array_sum = np.sum(array)
np.isnan(array_sum)

False

In [24]:
# model
model = Sequential()

input_shape = (128, 87, 1)
model.add(Conv2D(512, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(16, kernel_size=(2, 2), activation='relu', input_shape=(512,43,8)))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.25))
model.add(Dense(3, activation='softmax'))

epochs=50
batch_size=256
lr = 0.00001
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(learning_rate=lr),
              metrics=['accuracy'])

model.summary()



# fit the model
history=model.fit(X_train, y_train_hot,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test_hot))

training_loss=history.history['loss']
validation_loss=history.history['val_loss']
training_acc=history.history['accuracy']
validation_acc=history.history['val_accuracy']
import plotly.express as px
from plotly.subplots import make_subplots
# fig=px.line(training_loss)
# fig.add_scatter(validation_loss, mode='lines')

import pandas as pd
df=pd.DataFrame()
df['training_loss'] = training_loss
df['validation_loss'] = validation_loss
df['training_acc'] = training_acc
df['validation_acc'] = validation_acc


trace1 = go.Scatter(
x=df.index,
    y=df['training_loss'],
    name='Training Loss'
)
trace2 = go.Scatter(
    x=df.index,
    y=df['validation_loss'],
    name='Validation Loss'
)
trace3 = go.Scatter(
    x=df.index,
    y=df['training_acc'],
    name='Training Accuracy',
    yaxis='y2'
)
trace4 = go.Scatter(
    x=df.index,
    y=df['validation_acc'],
    name='Validation Accuracy',
    yaxis='y2'
)

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3,secondary_y=True)
fig.add_trace(trace4,secondary_y=True)
fig.show()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 127, 86, 512)      2560      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 63, 43, 512)       0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 1387008)           0         
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 4161027   
Total params: 4,163,587
Trainable params: 4,163,587
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50


In [20]:
training_loss=history.history['loss']
validation_loss=history.history['val_loss']
training_acc=history.history['accuracy']
validation_acc=history.history['val_accuracy']
import plotly.express as px
from plotly.subplots import make_subplots
# fig=px.line(training_loss)
# fig.add_scatter(validation_loss, mode='lines')

import pandas as pd
df=pd.DataFrame()
df['training_loss'] = training_loss
df['validation_loss'] = validation_loss
df['training_acc'] = training_acc
df['validation_acc'] = validation_acc


trace1 = go.Scatter(
x=df.index,
    y=df['training_loss'],
    name='Training Loss'
)
trace2 = go.Scatter(
    x=df.index,
    y=df['validation_loss'],
    name='Validation Loss'
)
trace3 = go.Scatter(
    x=df.index,
    y=df['training_acc'],
    name='Training Accuracy',
    yaxis='y2'
)
trace4 = go.Scatter(
    x=df.index,
    y=df['validation_acc'],
    name='Validation Accuracy',
    yaxis='y2'
)

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3,secondary_y=True)
fig.add_trace(trace4,secondary_y=True)
fig.show()

# Mel spectrogram

In [None]:
# model
model = Sequential()

input_shape = (128, 87, 1)
model.add(Conv2D(16, kernel_size=(2, 2), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(16, kernel_size=(2, 2), activation='relu', input_shape=(512,43,8)))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.25))
model.add(Dense(6, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model.summary()

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
epochs=10
batch_size=128
# fit the model
model.fit(np.array(X_train), y_train_hot,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test_hot))