In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model

In [None]:
# Function for rounding the labels
def RoundFun(labels):

  temp = np.zeros(len(labels))
  
  for i in range(len(labels)):
    if(labels[i] >= 0.5):
      temp[i] = 1
    else:
      temp[i] = 0
  return temp

In [None]:
# Loading VGG16, Custom CNN and MLP (ANN) model, which are trained on FreeField1010 Dataset
model_vgg16 = load_model("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/DCNN Models/FF DCNN CV/ff_vis_VGG16_CV1.h5")
model_cust = load_model("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/DCNN Models/FF DCNN CV/ff_vis_Cust_CV1.h5")
model_stat = load_model("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/DCNN Models/FF DCNN CV/ff_stat_CV1.h5")

# Applying Standard Scaling on the statistical features
scaler = StandardScaler()
stat_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures.csv")
ch_stat_data = stat_data.loc[:, 'ZeroCrossingsRate':'MFCC19']
ch_stat_data = scaler.fit_transform(ch_stat_data)

del stat_data

In [None]:
# Predicting labels of CBC2020 Dataset for Nocall Filtering 
VGGCustStat = np.zeros(5000)
VGGCust = np.zeros(5000)
CustStat = np.zeros(5000)

for i in range(0, 77):
  data_filename = "vis_feat_"+str(i)+".npy"
  
  # Loading Visual Features
  vis_data = np.load("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data/"+data_filename)

  # Predicting labels using trained MLP(ANN), VGG16, and Custom CNN model
  stat_pred = model_stat.predict(ch_stat_data[i*5000:(i+1)*5000])
  vgg_pred = model_vgg16.predict(vis_data)
  cust_pred = model_cust.predict(vis_data)

  if(i == 76):
    vgg_pred = vgg_pred[0:3135]
    cust_pred = cust_pred[0:3135]
    stat_pred = stat_pred[0:3135]

  # Ensembling VGG16 + Custom CNN + MLP(ANN - Statistical Features)
  pred_labels = (vgg_pred + cust_pred + stat_pred)/3
  pred_labels_round = RoundFun(pred_labels)

  # Ensembling VGG16 + Custom CNN
  pred_labels2 = (vgg_pred + cust_pred)/2
  pred_labels_round2 = RoundFun(pred_labels2)

  # Ensembling Custom CNN + MLP(ANN - Statistical Features)
  pred_labels3 = (RoundFun(cust_pred) + RoundFun(stat_pred))/2
  pred_labels_round3 = RoundFun(pred_labels3)

  # Storing ensembled outputs
  if(i == 0):
    VGGCustStat = pred_labels_round
    VGGCust = pred_labels_round2
    CustStat = pred_labels_round3
  else:
    VGGCustStat = np.append(VGGCustStat, pred_labels_round)
    VGGCust = np.append(VGGCust, pred_labels_round2)
    CustStat = np.append(CustStat, pred_labels_round3)

  print("Done : "+data_filename)

Done : vis_feat_44.npy
Done : vis_feat_45.npy
Done : vis_feat_46.npy
Done : vis_feat_47.npy
Done : vis_feat_48.npy
Done : vis_feat_49.npy
Done : vis_feat_50.npy
Done : vis_feat_51.npy
Done : vis_feat_52.npy
Done : vis_feat_53.npy
Done : vis_feat_54.npy


In [None]:
i = 77
data_filename = "vis_feat_"+str(i)+".npy"

# Loading Visual Features
vis_data = np.load("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data/"+data_filename)

# Predicting labels using trained MLP(ANN), VGG16, and Custom CNN model
stat_pred = model_stat.predict(ch_stat_data[383135:])
vgg_pred = model_vgg16.predict(vis_data)
cust_pred = model_cust.predict(vis_data)

# Ensembling VGG16 + Custom CNN + MLP(ANN - Statistical Features)
pred_labels = (vgg_pred + cust_pred + stat_pred)/3
pred_labels_round = RoundFun(pred_labels)

# Ensembling VGG16 + Custom CNN
pred_labels2 = (vgg_pred + cust_pred)/2
pred_labels_round2 = RoundFun(pred_labels2)

# Ensembling Custom CNN + MLP(ANN - Statistical Features)
pred_labels3 = (RoundFun(cust_pred) + RoundFun(stat_pred))/2
pred_labels_round3 = RoundFun(pred_labels3)

# Storing ensembled outputs
VGGCustStat = pred_labels_round
VGGCust = pred_labels_round2
CustStat = pred_labels_round3

print("Done : "+data_filename)

Done : vis_feat_77.npy


In [None]:
# Saving Ensemble outputs into numpy file
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat8.npy", VGGCustStat)
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust8.npy", VGGCust)
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat8.npy", CustStat)

In [None]:
# Batch 0-11:
# VGGCustStat: 8127 | 46873
# VGGCust:     5211 | 49789
# CustStat:    5861 | 49139

# Batch 11-22:
# VGGCustStat: 9876 | 45124
# VGGCust:     6751 | 48249
# CustStat:    7422 | 47578

# Batch 22-33:
# VGGCustStat: 9238 | 45762
# VGGCust:     6061 | 48939
# CustStat:    6790 | 48210

# Batch 33-44:
# VGGCustStat: 9635 | 45365
# VGGCust:     6977 | 48023
# CustStat:    7239 | 47761

# Batch 44-55:
# VGGCustStat: 9791 | 45209
# VGGCust:     6625 | 48375
# CustStat:    7036 | 47964
 
# Batch 55-66:
# VGGCustStat: 8628 | 46372
# VGGCust:     5860 | 49140
# CustStat:    6006 | 48994

# Batch 66-77:
# VGGCustStat: 7143 | 45992
# VGGCust:     4951 | 48184
# CustStat:    5003 | 48132

# Batch 78:
# VGGCustStat: 1376 | 5267
# VGGCust:     879  | 5764
# CustStat:    1114 | 5529

Converting binary labels to string

In [None]:
# Loading all ensembled labels files of three different ensemble apporaches
VGGCustStat = np.concatenate((np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat1.npy'), 
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat2.npy'),
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat3.npy'),
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat4.npy'),
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat5.npy'),
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat6.npy'),
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat7.npy'),
                        np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCustStat8.npy')))

VGGCust = np.concatenate((np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust1.npy'), 
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust2.npy'),
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust3.npy'),
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust4.npy'),
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust5.npy'),
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust6.npy'),
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust7.npy'),
                    np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/VGGCust8.npy')))

CustStat = np.concatenate((np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat1.npy'), 
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat2.npy'),
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat3.npy'),
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat4.npy'),
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat5.npy'),
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat6.npy'),
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat7.npy'),
                     np.load('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/CustStat8.npy')))

# Loading CSV file with audio path, original labels and statistical features
stat_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures.csv")

print(VGGCustStat.shape)
print(VGGCust.shape)
print(CustStat.shape)
print(stat_data.shape)

(389778,)
(389778,)
(389778,)
(389778, 76)


In [None]:
VGGCustStatL = np.zeros(len(VGGCustStat), dtype = 'U32')
VGGCustL = np.zeros(len(VGGCust), dtype = 'U32')
CustStatL = np.zeros(len(CustStat), dtype = 'U32')

# Labeling class 0 to Nocall
for i in range(len(VGGCustStat)):
  if(VGGCustStat[i] == 0):
    VGGCustStatL[i] = "Nocall"
  else:
    VGGCustStatL[i] = stat_data.iloc[i,0]

for i in range(len(VGGCust)):
  if(VGGCust[i] == 0):
    VGGCustL[i] = "Nocall"
  else:
    VGGCustL[i] = stat_data.iloc[i,0]

for i in range(len(CustStat)):
  if(CustStat[i] == 0):
    CustStatL[i] = "Nocall"
  else:
    CustStatL[i] = stat_data.iloc[i,0]

In [None]:
# Adding those predicted labels into 3 separate columns
new_data = stat_data
new_data = new_data.assign(VGGCustStat = VGGCustStatL, VGGCust = VGGCustL, CustStat = CustStatL)
print(new_data.loc[:, 'VGGCustStat':'CustStat'])

       VGGCustStat VGGCust CustStat
0           aldfly  aldfly   aldfly
1           aldfly  aldfly   aldfly
2           aldfly  aldfly   aldfly
3           aldfly  aldfly   aldfly
4           aldfly  aldfly   aldfly
...            ...     ...      ...
389773      yetvir  yetvir   yetvir
389774      yetvir  yetvir   yetvir
389775      Nocall  Nocall   Nocall
389776      yetvir  yetvir   yetvir
389777      yetvir  yetvir   yetvir

[389778 rows x 3 columns]


In [None]:
# Saving new CSV file with audio path, original labels, statistical features and predicted labels
new_data.to_csv('/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures_Nocall_Labels.csv', index=False)

**Code to separate Nocall Data**

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder 

stat_data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Final_OverlapData_StatFeatures_Nocall_Labels.csv")

class_labels = stat_data.iloc[:, 76]

# Onehot Encoding of labels
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(class_labels)

# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_labels = onehot_encoder.fit_transform(integer_encoded)

In [None]:
for j in range(0, 75):
  # Laoding visual features
  data = np.load("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data/vis_feat_"+str(j)+".npy")

  labels = stat_data.iloc[j*5000:(j+1)*5000, 76]
  labels_onehot = onehot_labels[j*5000:(j+1)*5000, 1:] 

  arr_size = len(labels) - labels.value_counts()["Nocall"]
  new_data = np.zeros((arr_size, 128, 431, 3), dtype='uint8')
  new_labels = np.zeros((arr_size, 264), dtype='uint8')
  arr_labels = np.asarray(labels)

  # Filtering out data which has "Nocall" label
  cnt = 0
  for i in range(len(arr_labels)):
    if(arr_labels[i] != "Nocall"):
      new_data[cnt] = data[i]
      new_labels[cnt] = labels_onehot[i]
      cnt = cnt + 1
  
  print("File {} : {}".format(j, new_data.shape))
  
  # Saving filterd data and labels into files
  np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Filtered Data/vis_feat_"+str(j)+".npy",new_data)
  np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Filtered Labels/vis_labels_"+str(j)+".npy",new_labels)

  del data
  del new_data

File 0 : (4495, 128, 431, 3)
File 1 : (4445, 128, 431, 3)
File 2 : (4404, 128, 431, 3)
File 3 : (3809, 128, 431, 3)
File 4 : (4387, 128, 431, 3)
File 5 : (3738, 128, 431, 3)
File 6 : (4144, 128, 431, 3)
File 7 : (4209, 128, 431, 3)
File 8 : (4186, 128, 431, 3)
File 9 : (4618, 128, 431, 3)
File 10 : (4438, 128, 431, 3)
File 11 : (2816, 128, 431, 3)
File 12 : (4258, 128, 431, 3)
File 13 : (3967, 128, 431, 3)
File 14 : (3861, 128, 431, 3)
File 15 : (4781, 128, 431, 3)
File 16 : (4590, 128, 431, 3)
File 17 : (3969, 128, 431, 3)
File 18 : (3835, 128, 431, 3)
File 19 : (4349, 128, 431, 3)
File 20 : (4555, 128, 431, 3)
File 21 : (4143, 128, 431, 3)
File 22 : (3822, 128, 431, 3)
File 23 : (3691, 128, 431, 3)
File 24 : (3975, 128, 431, 3)
File 25 : (4045, 128, 431, 3)
File 26 : (4149, 128, 431, 3)
File 27 : (4449, 128, 431, 3)
File 28 : (4572, 128, 431, 3)
File 29 : (4362, 128, 431, 3)
File 30 : (4301, 128, 431, 3)
File 31 : (4213, 128, 431, 3)
File 32 : (4183, 128, 431, 3)
File 33 : (3713, 128

In [None]:
j = 77
# Laoding visual features
data = np.load("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Data/vis_feat_"+str(j)+".npy")

labels = stat_data.iloc[383135:, 76]
labels_onehot = onehot_labels[383135:, 1:] 

arr_size = len(labels) - labels.value_counts()["Nocall"]
new_data = np.zeros((arr_size, 128, 431, 3), dtype='uint8')
new_labels = np.zeros((arr_size, 264), dtype='uint8')
arr_labels = np.asarray(labels)

# Filtering out data which has "Nocall" label
cnt = 0
for i in range(len(arr_labels)):
  if(arr_labels[i] != "Nocall"):
    new_data[cnt] = data[i]
    new_labels[cnt] = labels_onehot[i]
    cnt = cnt + 1

print("File {} : {}".format(j, new_data.shape))

# Saving filterd data and labels into files
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Filtered Data/vis_feat_"+str(j)+".npy",new_data)
np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Filtered Labels/vis_labels_"+str(j)+".npy",new_labels)

del data
del new_data

File 77 : (5267, 128, 431, 3)


**Make Batches of 64**

In [None]:
#Function for saving data and labels into batches of 64
def saveData(temp_data, temp_labels, cnt):
  for i  in range(len(temp_data)//64):
    start = i*64
    end = (i+1)*64
    cnt = cnt + 1
    print("Batch saved : " ,cnt)

    # Saving data and labels into numpy files
    np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Batch Data 64/Data/vis_feat_"+str(cnt)+".npy",temp_data[start:end,])
    np.save("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Batch Data 64/Labels/vis_labels_"+str(cnt)+".npy",temp_labels[start:end,])
  return cnt

In [None]:
cnt = 0 
left_data = np.zeros((0, 128, 431, 3), dtype = 'uint8')
left_labels = np.zeros((0, 264), dtype = 'uint8')

for i in range(78):
  # Loading visual features and labels
  data = np.load("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Filtered Data/vis_feat_"+str(i)+".npy")
  labels = np.load("/content/drive/MyDrive/Colab Notebooks/Fall 2021/Final Project/Visual Features_abs_Overlap_RGB/Filtered Labels/vis_labels_"+str(i)+".npy")

  # Making batches of 64
  if(len(left_data) != 0):
    new_data = np.concatenate((left_data, data), axis = 0)
    new_labels = np.concatenate((left_labels, labels), axis = 0)
    cnt  = saveData(new_data, new_labels, cnt)
    if(len(new_data)%64 == 0):
      left_data = np.zeros((0, 128, 431, 3), dtype = 'uint8') 
      left_labels = np.zeros((0, 264), dtype = 'uint8')
    else:  
      left_data = new_data[-(len(new_data)%64):,] 
      left_labels = new_labels[-(len(new_labels)%64):,]
    print("\nleft_data : ", left_data.shape)
  else:
    cnt  = saveData(data, labels, cnt)
    left_data = np.zeros((len(data)%64, 128, 431, 3), dtype = 'uint8')
    left_labels = np.zeros((len(labels)%64, 264), dtype = 'uint8')
    left_data = data[-(len(left_data)):,] 
    left_labels = labels[-(len(left_data)):,] 
    print("\nleft_data : ", left_data.shape)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Batch saved :  244
Batch saved :  245
Batch saved :  246
Batch saved :  247
Batch saved :  248
Batch saved :  249
Batch saved :  250
Batch saved :  251
Batch saved :  252
Batch saved :  253
Batch saved :  254
Batch saved :  255
Batch saved :  256
Batch saved :  257
Batch saved :  258
Batch saved :  259
Batch saved :  260
Batch saved :  261
Batch saved :  262
Batch saved :  263
Batch saved :  264
Batch saved :  265
Batch saved :  266
Batch saved :  267
Batch saved :  268

left_data :  (1, 128, 431, 3)
Batch saved :  269
Batch saved :  270
Batch saved :  271
Batch saved :  272
Batch saved :  273
Batch saved :  274
Batch saved :  275
Batch saved :  276
Batch saved :  277
Batch saved :  278
Batch saved :  279
Batch saved :  280
Batch saved :  281
Batch saved :  282
Batch saved :  283
Batch saved :  284
Batch saved :  285
Batch saved :  286
Batch saved :  287
Batch saved :  288
Batch saved :  289
Batch saved :  290
Batch saved

In [None]:
# Last Batch of 12 is not taken