In [None]:
import numpy as np
import os
import cv2
import pandas as pd
import librosa
import matplotlib.pyplot as plt
from scipy import signal
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import keras
import tensorflow as tf
import keras.backend as K
from collections import Counter
from tensorflow.python.client import device_lib
from keras.optimizers import Adam, Adamax, SGD
import imutils
import tensorflow_addons as tfa
from keras.models import load_model

In [None]:

os.environ["CUDA_VISIBLE_DEVICES"]="0"
os.environ["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"
gpu = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)
print(gpu)
tf.keras.backend.clear_session()
TF_ENABLE_ONEDNN_OPTS=0
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(tf.device('/gpu:0'))

tf.device('/gpu:0')

In [None]:
_,sample_rate=librosa.load("/home/neel/Acoustic/Acoustics/dataset/datachunks/channel1_normal_train107_23.wav",sr=None)
duration_GT= len(_)/sample_rate
label_file_path="/home/neel/Acoustic/Acoustics/dataset/labels/"
labels=[]
for i in os.listdir(label_file_path):
    labels.append(i[:-4])
dataset=[]
for label in labels:
    row=[]
    file=open(label_file_path+label+".txt","r")
    label_s=file.read()
    label_list=label_s.replace('\n','').split(" ")
    channel1="channel1_"+label+".wav"
    channel2="channel2_"+label+".wav"
    audio,sample_rate=librosa.load("dataset/datachunks/"+channel1,sr=None)
    duration = len(audio)/sample_rate
    if duration == duration_GT:
        row.append(channel1)
        row.append(channel2)
        for cord in label_list[1:]:
            row.append(cord)
        X1=float(row[2])
        Y1=float(row[3])
        X2=float(row[4])
        Y2=float(row[5])
        row[2]=X1/1440
        row[3]=Y1/1080
        row[4]=X2/1440
        row[5]=Y2/1080
        dataset.append(row)

In [None]:
datasets=pd.DataFrame(dataset)

In [None]:
datasets.to_csv('dataset/train.csv', header=False, index=False)
del dataset, datasets, labels

In [None]:
base_path="/home/neel/Acoustic/Acoustics/dataset"
audio_path=os.path.sep.join([base_path,"datachunks"])
annots_path=os.path.sep.join([base_path,"train.csv"])

base_output="/home/neel/Acoustic/Acoustics/output2.0"
model_path=os.path.sep.join([base_output,"detector.h5"])
plot_path=os.path.sep.join([base_output,"plot.png"])
test_file=os.path.sep.join([base_output,"test.txt"])

In [None]:
print("[INFO] loading dataset...")
rows = open(annots_path).read().strip().split("\n")
spectrogram=np.empty(((len(rows)),525,553,2), dtype="float32")
bounding_box_cords=[]
window_size=int(1048)
wd = signal.windows.hamming(window_size)
slide_size = int(1)
overlap = window_size - slide_size
filenames=[]
cnt=0
for row in rows:
    # print(cnt)
    row = row.split(",")
    (filename1,filename2,X, Y, W, H) = row
    channel1_Path = os.path.sep.join([audio_path, filename1])
    channel2_path = os.path.sep.join([audio_path, filename2])
    
    channel1,sample_rate=librosa.load(channel1_Path,sr=None)
    channel2,sample_rate=librosa.load(channel2_path,sr=None)

    # image1 = cv2.imread(imagePath1)
    # image2=cv2.imread(imagePath2)
    # (h, w) = image1.shape[:2]

    # startX = float(startX) / w
    # startY = float(startY) / h
    # endX = float(endX) / w
    # endY = float(endY) / h

    frequency,time,spectrum1=signal.spectrogram(channel1,nfft=window_size,fs=sample_rate,window=wd,noverlap=overlap,mode='magnitude')
    frequency,time,spectrum2=signal.spectrogram(channel2,nfft=window_size,fs=sample_rate,window=wd,noverlap=overlap,mode='magnitude')

    # print(image1.shape)
    data=np.stack((spectrum1,spectrum2),axis=-1)
    spectrogram[cnt]=data
    del data,frequency,time
    cnt+=1
    bounding_box_cords.append((X, Y, W, H))
    filenames.append([filename1,filename2])
    # filenames.append(filename1)
print("Done.")

In [None]:
targets = np.array(bounding_box_cords, dtype="float32")

del bounding_box_cords,rows

In [None]:
print(targets.shape)
print(spectrogram.shape)

In [None]:
split = train_test_split(spectrogram, targets, filenames, test_size=0.10,random_state=42)
del spectrogram, targets,filenames
(trainData, testData) = split[:2]
(trainTargets, testTargets) = split[2:4]
(trainFilenames, testFilenames) = split[4:]
del split

print("[INFO] saving testing filenames...")
f = open(test_file, "w")
for i in testFilenames:
    f.write(i[0]+","+i[1])
    f.write("\n")
# f.write("\n".join(testFilenames))
f.close()

In [None]:
from keras.layers import LeakyReLU
model= tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32,(3,3), padding='valid',strides=2, input_shape=(525,553,2), activation='relu'))
# model.add(LeakyReLU(alpha=0.03))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), padding='valid'))
# model.add(tf.keras.layers.Conv2D(32,(3,3), padding='valid',strides=2,activation='relu'))
# # model.add(LeakyReLU(alpha=0.03))
# model.add(tf.keras.layers.BatchNormalization())
# model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), padding='valid'))
# model.add(tf.keras.layers.Conv2D(64,(3,3), padding='valid', strides=2,activation='relu'))
# # model.add(LeakyReLU(alpha=0.03))
# model.add(tf.keras.layers.BatchNormalization())
# model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), padding='valid'))
# model.add(tf.keras.layers.Conv2D(64,(3,3), padding='valid',strides=2,activation='relu'))
# # model.add(LeakyReLU(alpha=0.03))
# model.add(tf.keras.layers.BatchNormalization())
# model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2), padding='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128,activation='relu'))
model.add(LeakyReLU(alpha=0.03))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(64,activation='relu'))
# model.add(LeakyReLU(alpha=0.03))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(32,activation='relu'))
# model.add(LeakyReLU(alpha=0.03))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(4,activation='relu'))
# model.add(LeakyReLU(alpha=0.03))

In [None]:
init_lr=1e-4

epoch=60
batch_size=32
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    init_lr,
    decay_steps=20,
    decay_rate=0.1,
    staircase=True)

opt = SGD(learning_rate=lr_schedule ,momentum=0.9, )
model.compile(
  optimizer=opt,
  loss='mse',
  metrics=['accuracy'],run_eagerly=True)
model.summary()

In [None]:
with tf.device("/gpu:0"):
	H = model.fit(
		trainData, trainTargets,
		validation_data=(testData, testTargets),
		batch_size=batch_size,
		epochs=epoch,
		verbose=1)
	print(H)

In [None]:
print("[INFO] saving object detector model...")
model.save(model_path, save_format="h5")
N = epoch
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_loss")
plt.title("Bounding Box Regression MSE on Training Set")
plt.xlabel("Epoch #")
plt.ylabel("Accuracy")
plt.legend(loc="lower left")
plt.savefig(plot_path)

In [None]:
del spectrum1, spectrum2, trainData, testData, model, trainTargets

In [None]:
model=load_model('/home/neel/Acoustic/Acoustics/output2.0/detector.h5')
for i,img in enumerate(testFilenames):
    if i==10:
        break
    imagePath1=os.path.sep.join([audio_path, img[0]])
    imagePath2=os.path.sep.join([audio_path, img[1]])
    channel1,sample_rate=librosa.load(imagePath1,sr=None)
    channel2,sample_rate=librosa.load(imagePath2,sr=None)
    frequency,time,spectrum1=signal.spectrogram(channel1,nfft=window_size,fs=sample_rate,window=wd,noverlap=overlap,mode='magnitude')
    frequency,time,spectrum2=signal.spectrogram(channel2,nfft=window_size,fs=sample_rate,window=wd,noverlap=overlap,mode='magnitude')
    spectrum1=np.expand_dims(spectrum1,axis=0)
    spectrum2=np.expand_dims(spectrum2,axis=0)
    image=np.stack((spectrum1,spectrum2),axis=-1)

    preds = model.predict(image)[0]
    (startX, startY, endX, endY) = preds
    print(imagePath1)
    # image2 = cv2.imread(os.path.sep.join(["/home/neel/Acoustic/yolov5_training/img_data/frames", img[0][9:]]))
    # image2 = imutils.resize(image2, width=600)
    # (h, w) = image1.shape[:2]
    # startX = startX*w
    # startY = startY*h
    # endX = endX*w
    # endY = endY*h
    f=open("/home/neel/Acoustic/Acoustics/dataset/labels/"+img[0][9:-4]+".txt")
    ground_truth=f.read()
    print("Ground truth: ",ground_truth[1:])
    print("Predicted: ",startX, startY, endX, endY)
    # cv2.rectangle(image2, (startX, startY), (endX, endY),(0, 255, 0), 2)
	# # show the output image
    # cv2.imwrite("/home/neel/Acoustic/Acoustics/output/test_img/"+img[0], image2)
    # cv2.imshow("Output", image2)
    # cv2.waitKey(0)
    
    

In [None]:

from matplotlib import patches
import matplotlib.pyplot as plt
import cv2
fig = plt.figure()
channel1,sample_rate=librosa.load("/home/neel/Acoustic/Acoustics/dataset/datachunks/channel1_3m_train2_5.wav",sr=None)
channel2,sample_rate=librosa.load("/home/neel/Acoustic/Acoustics/dataset/datachunks/channel2_3m_train2_5.wav",sr=None)
frequency,time,spectrum1=signal.spectrogram(channel1,nfft=window_size,fs=sample_rate,window=wd,noverlap=overlap,mode='magnitude')
frequency,time,spectrum2=signal.spectrogram(channel2,nfft=window_size,fs=sample_rate,window=wd,noverlap=overlap,mode='magnitude')
spectrum1=np.expand_dims(spectrum1,axis=0)
spectrum2=np.expand_dims(spectrum2,axis=0)
image=np.stack((spectrum1,spectrum2),axis=-1)
preds = model.predict(image)[0]
(startX, startY, endX, endY) = preds
startX,startY,endX, endY=startX*1440,startY*1080, endX*1440, endY*1080
width=endX-startX
height=endY-startY
ax = fig.add_axes([0,0,1,1])
print(startX,startY,width,height)
image = plt.imread('/home/neel/Acoustic/yolov5_training/img_data/frames/3m_train2_5.jpg')
plt.imshow(image)
rect = patches.Rectangle((startX,startY), width, height, edgecolor='r', facecolor='none')
ax.add_patch(rect)
